<!DOCTYPE html>
<html lang="zh-CN">

<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>arXiv 每日论文精选</title>
    <script src="https://cdn.tailwindcss.com"></script>
    <link href="https://cdn.jsdelivr.net/npm/font-awesome@4.7.0/css/font-awesome.min.css" rel="stylesheet">
    <link rel="stylesheet" href="static/styles.css?v=1761467343">
    <script src="static/tailwind.config.js"></script>

    <style>
        /* 分级折叠功能样式 */
        .collapsed-level-1 .paper-details {
            display: none;
        }
        
        .collapsed-level-2 {
            display: none !important;
        }
        
        /* 展开/折叠图标样式 */
        .expand-icon {
            display: inline-block;
            width: 20px;
            text-align: center;
            margin-right: 5px;
        }
        
        /* 展开/折叠按钮样式 */
        .expand-toggle {
            cursor: pointer;
            padding: 8px 12px;
            background-color: #f3f4f6;
            border: 1px solid #e5e7eb;
            border-radius: 6px;
            margin-bottom: 16px;
            text-align: center;
            font-weight: 500;
            color: #4b5563;
            transition: all 0.2s ease;
        }
        
        .expand-toggle:hover {
            background-color: #e5e7eb;
        }
        
        /* 分割线样式 */
        .papers-divider {
            height: 1px;
            background-color: #e5e7eb;
            margin: 20px 0;
            position: relative;
        }
        
        .papers-divider-label {
            position: absolute;
            left: 50%;
            top: 50%;
            transform: translate(-50%, -50%);
            background-color: white;
            padding: 0 12px;
            color: #9ca3af;
            font-size: 14px;
            cursor: pointer;
        }
        
        .papers-divider-label:hover {
            color: #4b5563;
        }
        
        /* 展开后的样式 */
        .expanded-all .collapsed-level-1 .paper-details,
        .expanded-all .collapsed-level-2 {
            display: block;
        }
        
        .expanded-level-2 .collapsed-level-2 {
            display: block;
        }
    </style>
    </head>

<body class="bg-gray-50 font-sans text-dark">
    <!-- 顶部导航与统计信息合并 -->
    <header class="bg-white shadow-sm sticky top-0 z-10 border-b border-gray-200">
        <div class="container mx-auto px-4 py-4">
            <div class="flex flex-col md:flex-row justify-between items-start md:items-center mb-3">
                <div class="flex items-center">
                    <i class="fa fa-book text-primary text-xl mr-2"></i>
                    <h1 class="text-lg md:text-xl font-bold text-gray-800">arXiv 每日论文精选</h1>
                </div>
                <div class="flex items-center mt-2 md:mt-0">
                    <span id="current-date" class="text-gray-600 text-sm">
                        <i class="fa fa-calendar-o mr-1"></i>2025-10-10
                    </span>
                    <div class="ml-3 relative" id="date-picker-container">
                        <button id="date-picker-toggle" class="bg-light border border-gray-300 text-gray-700 py-1 px-3 pr-6 rounded text-sm leading-tight focus:outline-none focus:bg-white inline-flex items-center">
                            <i class="fa fa-calendar mr-2"></i>
                            <span id="selected-date-text">2025-10-10</span>
                            <i class="fa fa-chevron-down ml-2 text-xs"></i>
                        </button>
                        <div id="date-picker" class="hidden absolute right-0 mt-1 bg-white border border-gray-300 rounded shadow-lg p-2 z-20 w-56">
                            <div class="flex justify-between items-center mb-2">
                                <button id="prev-month" class="text-gray-500 hover:text-gray-700"><i class="fa fa-chevron-left"></i></button>
                                <h4 id="current-month">2025-10-10</h4>
                                <button id="next-month" class="text-gray-500 hover:text-gray-700"><i class="fa fa-chevron-right"></i></button>
                            </div>
                            <div class="grid grid-cols-7 gap-1 text-center text-xs mb-1">
                                <div class="text-gray-500">日</div>
                                <div class="text-gray-500">一</div>
                                <div class="text-gray-500">二</div>
                                <div class="text-gray-500">三</div>
                                <div class="text-gray-500">四</div>
                                <div class="text-gray-500">五</div>
                                <div class="text-gray-500">六</div>
                            </div>
                            <div id="calendar-grid" class="grid grid-cols-7 gap-1 text-center text-sm">
                                <!-- 日历格子将通过JavaScript动态生成 -->
                            </div>
                        </div>
                    </div>
                </div>
            </div>

            <!-- 统计信息 -->
            <div class="flex flex-wrap gap-4 text-sm">
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-file-text-o"></i> 总论文数:</span>
                    <span id="total-papers" class="font-semibold text-primary">198</span>
                </div>
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-star"></i> 精选论文数:</span>
                    <span id="selected-papers" class="font-semibold text-accent">20</span>
                </div>
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-line-chart"></i> 平均评分:</span>
                    <span id="avg-score" class="font-semibold text-secondary">2.8</span>
                </div>
            </div>
        </div>
    </header>

    <!-- 主内容区 -->
    <main class="container mx-auto px-4 py-5">
        <!-- 筛选器 -->
        <div class="mb-4 flex flex-col sm:flex-row justify-between items-start sm:items-center">
            <div class="text-gray-700 text-sm mb-2 sm:mb-0">
                <span id="display-count" class="font-medium">显示 198 篇论文 (共 198 篇)</span>
            </div>
            <div class="flex space-x-2">
                <button id="show-all"
                    class="px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors">
                    全部论文
                </button>
                <button id="show-selected"
                    class="px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors">
                    仅显示精选
                </button>
            </div>
        </div>

        <!-- 论文列表 -->
        <div id="papers-container" class="grid grid-cols-1 gap-4">
            
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08048v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>TaoSR-AGRL：面向电商搜索相关性的自适应引导强化学习框架
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            TaoSR-AGRL: Adaptive Guided Reinforcement Learning Framework for E-commerce Search Relevance
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jianhui Yang, Yiming Jin, Pengkun Jiao, Chenhe Dong, Zerui Huang, Shaowei Yao, X...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究电商搜索中LLM相关性预测的推理能力不足问题，核心思想是通过规则感知奖励塑造和自适应引导回放机制，为多步推理提供密集的结构化奖励和针对性指导。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对电商搜索相关性预测这一核心问题，提出了结合强化学习和LLM的创新框架，完美契合搜索领域的技术前沿需求。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 10:34:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08048v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08048v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Query-product relevance prediction is fundamental to e-commerce search and has become even more critical in the era of AI-powered shopping, where semantic understanding and complex reasoning directly shape the user experience and business conversion. Large Language Models (LLMs) enable generative, reasoning-based approaches, typically aligned via supervised fine-tuning (SFT) or preference optimization methods like Direct Preference Optimization (DPO). However, the increasing complexity of business rules and user queries exposes the inability of existing methods to endow models with robust reasoning capacity for long-tail and challenging cases. Efforts to address this via reinforcement learning strategies like Group Relative Policy Optimization (GRPO) often suffer from sparse terminal rewards, offering insufficient guidance for multi-step reasoning and slowing convergence. To address these challenges, we propose TaoSR-AGRL, an Adaptive Guided Reinforcement Learning framework for LLM-based relevance prediction in Taobao Search Relevance. TaoSR-AGRL introduces two key innovations: (1) Rule-aware Reward Shaping, which decomposes the final relevance judgment into dense, structured rewards aligned with domain-specific relevance criteria; and (2) Adaptive Guided Replay, which identifies low-accuracy rollouts during training and injects targeted ground-truth guidance to steer the policy away from stagnant, rule-violating reasoning patterns toward compliant trajectories. TaoSR-AGRL was evaluated on large-scale real-world datasets and through online side-by-side human evaluations on Taobao Search. It consistently outperforms DPO and standard GRPO baselines in offline experiments, improving relevance accuracy, rule adherence, and training stability. The model trained with TaoSR-AGRL has been successfully deployed in the main search scenario on Taobao, serving hundreds of millions of users.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07885v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>使用大型语言模型生成和标注电子商务中的物品使用场景
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Generation and annotation of item usage scenarios in e-commerce using large language models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Madoka Hagiri, Kazushi Okamoto, Koki Karube, Kei Harada, Atsushi Shibata
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究电商中互补推荐的主观性和个性化难题，核心思想是利用LLM生成具体的物品使用场景，通过情境想象来理解用户组合物品的动机。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接应用LLM技术生成电商场景下的物品使用情境，为核心推荐系统提供新的数据构建方法，与用户关注点高度契合。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 07:37:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07885v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07885v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Complementary recommendations suggest combinations of useful items that play important roles in e-commerce. However, complementary relationships are often subjective and vary among individuals, making them difficult to infer from historical data. Unlike conventional history-based methods that rely on statistical co-occurrence, we focus on the underlying usage context that motivates item combinations. We hypothesized that people select complementary items by imagining specific usage scenarios and identifying the needs in such situations. Based on this idea, we explored the use of large language models (LLMs) to generate item usage scenarios as a starting point for constructing complementary recommendation systems. First, we evaluated the plausibility of LLM-generated scenarios through manual annotation. The results demonstrated that approximately 85% of the generated scenarios were determined to be plausible, suggesting that LLMs can effectively generate realistic item usage scenarios.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07784v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>PLUM：为工业级生成式推荐系统适配预训练语言模型
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            PLUM: Adapting Pre-trained Language Models for Industrial-scale Generative Recommendations
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ruining He, Lukasz Heldt, Lichan Hong, Raghunandan Keshavan, Shifan Mao, Nikhil ...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究如何将预训练语言模型适配到工业级推荐任务；核心方法是构建包含项目语义ID化、领域持续预训练和生成式检索微调的完整框架，使模型能直接生成推荐项目的语义ID。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对LLM在推荐系统中的应用，提出了完整的工业级生成式推荐框架，完全符合核心关注领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 05:01:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07784v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07784v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) pose a new paradigm of modeling and computation for information tasks. Recommendation systems are a critical application domain poised to benefit significantly from the sequence modeling capabilities and world knowledge inherent in these large models. In this paper, we introduce PLUM, a framework designed to adapt pre-trained LLMs for industry-scale recommendation tasks. PLUM consists of item tokenization using Semantic IDs, continued pre-training (CPT) on domain-specific data, and task-specific fine-tuning for recommendation objectives. For fine-tuning, we focus particularly on generative retrieval, where the model is directly trained to generate Semantic IDs of recommended items based on user context. We conduct comprehensive experiments on large-scale internal video recommendation datasets. Our results demonstrate that PLUM achieves substantial improvements for retrieval compared to a heavily-optimized production model built with large embedding tables. We also present a scaling study for the model's retrieval performance, our learnings about CPT, a few enhancements to Semantic IDs, along with an overview of the training and inference methods that enable launching this framework to billions of users in YouTube.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08525v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>哪些注意力头对推理至关重要？基于强化学习的KV缓存压缩
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Which Heads Matter for Reasoning? RL-Guided KV Cache Compression
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wenjie Du, Li Jiang, Keda Tao, Xue Liu, Huan Wang
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究推理大语言模型中KV缓存压缩的核心问题，提出基于强化学习识别推理关键注意力头的方法，仅对关键头保留完整缓存而对其他头进行压缩，实现高效推理。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对LLM推理效率问题，提出基于强化学习的注意力头重要性识别方法，对推荐系统和搜索中的高效推理具有重要应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:50:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08525v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08525v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Reasoning large language models exhibit complex reasoning behaviors through the extended chain-of-thought generation, creating unprecedented Key-Value (KV) cache overhead during the decoding phase. Existing KV cache compression methods underperform on reasoning models: token-dropping methods break reasoning integrity by discarding critical information, while head-reallocating methods mistakenly compress reasoning-critical heads since they are designed for retrieval tasks, resulting in significant performance degradation as compression rates increase. We hypothesize that KV heads exhibit functional heterogeneity in reasoning models-some heads are critical for chain-of-thought consistency while others are compressible. To validate and exploit this insight, we propose RLKV, a novel reasoning-critical head identification framework, which uses reinforcement learning to directly optimize the relationship between each head's cache usage and reasoning quality. As RLKV produces rewards from actual generated samples during training, it naturally identifies heads relevant to reasoning behaviors. We then allocate full KV cache to these heads while applying compressed constant KV cache to others for efficient inference. Our experiments reveal that only a small fraction of attention heads is essential for reasoning, enabling our KV compression approach to outperform baseline methods while achieving 20-50% cache reduction with near lossless performance compared to uncompressed results.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08439v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>xRouter：基于强化学习的训练成本感知大语言模型编排系统
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            xRouter: Training Cost-Aware LLMs Orchestration System via Reinforcement Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Cheng Qian, Zuxin Liu, Shirley Kokane, Akshara Prabhakar, Jielin Qiu, Haolin Che...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究如何优化LLM部署中的成本-性能权衡问题，核心方法是使用强化学习训练一个能够智能调用不同成本模型的动态路由系统，通过成本感知的奖励函数实现端到端的编排决策。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对LLM编排系统的成本效率问题，使用强化学习实现动态路由，完全符合直接LLM应用和核心推荐系统优化的研究方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:52:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08439v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08439v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Modern LLM deployments confront a widening cost-performance spectrum: premium models deliver strong reasoning but are expensive, while lightweight models are economical yet brittle on complex tasks. Static escalation rules and keyword heuristics under-utilize this spectrum and fail to adapt across task types. We present xRouter, a tool-calling-based routing system in which a learned router can either answer directly or invoke one or more external models. The router is trained end-to-end with reinforcement learning using an explicit, cost-aware reward that encodes cost-performance trade-offs, eliminating the need for hand-engineered routing rules. Our implementation encompasses the full reinforcement learning framework, including reward and cost accounting, as well as the deployment and evaluation pipelines. Across diverse benchmarks, xRouter achieves strong cost-performance trade-offs (e.g., substantial cost reductions at comparable task completion rates), and provides empirical insights into what reliably helps learned routing and what does not, ranging from model trainability to the difficulty of eliciting sophisticated orchestration behaviors in small open models. We hope these findings and our open implementation will serve as a practical substrate for advancing learned, cost-aware LLM orchestration.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08404v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>单层微型Co⁴模型性能超越GPT-2和GPT-BERT
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Single layer tiny Co$^4$ outpaces GPT-2 and GPT-BERT
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Noor Ul Zain, Mohsin Raza, Ahsan Adeel
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何通过更高效的Transformer架构解决传统模型计算复杂度高的问题，核心思想是提出单层Co⁴架构，仅需8M参数和线性计算复杂度就能实现与传统深层模型相当的性能。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出了一种高效的Transformer架构Co⁴，具有线性复杂度O(N)，直接对应Transformer架构效率提升的核心研究方向，对推荐系统和搜索中的大规模序列处理具有重要价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:22:30
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08404v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08404v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We show that a tiny Co$^4$ machine(Adeel,2025) with a single layer, two heads, and 8M parameters, operating at an approximate cost of $O(N)$ (where $N$ is the number of input tokens), outpaces the BabyLM Challenge baselines GPT-2 (124M, 12 layers, $O(N^2))$ and GPT-BERT (30M, 12 layers, $O(N^2))$ in just two epochs, while both are trained for ten. Co$^4$ achieves orders-of-magnitude greater training efficiency on 10M tokens, demonstrating highly sample efficient pretraining. Using the BabyLM challenge evaluation pipeline across complex benchmarks, Co$^4$ exhibits strong zero-shot and fine-tuning performance on SuperGLUE tasks. Specifically, Co$^4$ outperforms GPT-2 on 5 out of 7 zero-shot metrics and 6 out of 7 fine-tuning tasks, and GPT-BERT on 4 out of 7 metrics in both cases. These results suggest the need to rethink prevailing deep learning paradigms and associated scaling laws.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08396v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>FlyLoRA：通过隐式秩级专家混合提升任务解耦与参数效率
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FlyLoRA: Boosting Task Decoupling and Parameter Efficiency via Implicit Rank-Wise Mixture-of-Experts
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Heming Zou, Yunliang Zang, Wutong Xu, Yao Zhu, Xiangyang Ji
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究LoRA在多任务微调中的参数干扰问题，核心思想是设计基于飞蛾嗅觉回路的隐式MoE架构，通过秩级专家激活和随机矩阵投影统一专家路由与降维投影，消除显式路由器并利用随机矩阵正交性减轻任务间干扰。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文在Transformer架构效率方面提出创新方法，通过隐式MoE和随机矩阵正交性解决LoRA参数干扰问题，对推荐系统多任务学习具有直接应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:17:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08396v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08396v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Low-Rank Adaptation (LoRA) is a widely used parameter-efficient fine-tuning method for foundation models, but it suffers from parameter interference, resulting in suboptimal performance. Although Mixture-of-Experts (MoE)-based LoRA variants show promise in mitigating intra-task correlations in single-task instruction tuning, they introduce additional router parameters and remain ineffective in multi-task model merging where inter-task interference arises. Inspired by the fly olfactory circuit, we propose FlyLoRA, an implicit MoE-based LoRA variant that introduces: (1) rank-wise expert activation in the up-projection matrix, and (2) an implicit router that unifies expert routing and down-projection, where a frozen sparse random projection matrix replaces the traditional dense trainable version. This design resolves the trade-off between intra-task decorrelation and computational efficiency by eliminating the need for an explicit router, while inherently mitigating inter-task interference due to the orthogonality property of random matrices. Extensive experiments across four domains -- general knowledge understanding, scientific question answering, mathematical reasoning, and code generation -- demonstrate consistent performance improvements over existing methods. Beyond empirical gains, FlyLoRA highlights how biological structures can inspire innovations in AI technologies. Code is available at https://github.com/gfyddha/FlyLoRA.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08256v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>混合与专家直接偏好优化：一种基于变分推断的直接偏好优化方法
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Mix- and MoE-DPO: A Variational Inference Approach to Direct Preference Optimization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jason Bohne, Pawel Polak, David Rosenberg, Brian Bloniarz, Gary Kazantsev
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究标准DPO方法在异构偏好分布和多任务场景下的表达能力限制问题，核心思想是通过变分推理框架将软混合模型和MoE架构引入DPO，实现专家策略的专业化和上下文相关的混合策略。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文将MoE架构与DPO对齐方法结合，直接针对Transformer效率和多任务学习优化，在LLM对齐和专家混合技术方面都有核心贡献。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:15:14
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08256v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08256v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Direct Preference Optimization (DPO) has recently emerged as a simple and effective alternative to reinforcement learning from human feedback (RLHF) for aligning large language models (LLMs) with user preferences. However, existing DPO formulations rely on a single monolithic model, which limits their expressivity in multi-task settings and their adaptability to heterogeneous or diverse preference distributions. In this work, we propose Mix- and MoE-DPO, a framework that extends DPO with both soft mixture models and mixture-of-experts (MoE) architectures, using a stochastic variational inference approach. Our method introduces a latent-variable model over expert assignments and optimizes a variational evidence lower bound (ELBO), enabling stable and efficient learning of specialized expert policies from preference data. Mix- and MoE-DPO provides three key advantages over standard DPO: (i) generalization via universal function approximation through mixtures; (ii) reward and policy specialization through expert components tailored to distinct preference modes; and (iii) contextual alignment through input-dependent soft gating that enables user-specific mixture policies. Our framework supports both shared base architectures with expert-specific policy heads and fully independent expert models, allowing flexible trade-offs between parameter efficiency and specialization. We validate our approach on a variety of model sizes and multi-preference datasets, demonstrating that Mix- and MoE-DPO offers a powerful and scalable method for preference-based LLM alignment.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08203v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>通过函数令牌实现大语言模型中的记忆检索与巩固
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Memory Retrieval and Consolidation in Large Language Models through Function Tokens
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shaohua Zhang, Yuan Lin, Hang Li
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究LLM中内存检索与巩固的机制问题，核心思想是提出函数令牌假说：函数令牌在推理时激活上下文中最具预测性的特征进行内存检索，在预训练时通过预测后续内容令牌来驱动内存巩固。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出函数令牌假说，深入揭示LLM内存检索与巩固机制，这对理解LLM内部工作原理及提升推荐系统、搜索等领域的模型透明度与可控性具有重要价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 13:31:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08203v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08203v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The remarkable success of large language models (LLMs) stems from their ability to consolidate vast amounts of knowledge into the memory during pre-training and to retrieve it from the memory during inference, enabling advanced capabilities such as knowledge memorization, instruction-following and reasoning. However, the mechanisms of memory retrieval and consolidation in LLMs remain poorly understood. In this paper, we propose the function token hypothesis to explain the workings of LLMs: During inference, function tokens activate the most predictive features from context and govern next token prediction (memory retrieval). During pre-training, predicting the next tokens (usually content tokens) that follow function tokens increases the number of learned features of LLMs and updates the model parameters (memory consolidation). Function tokens here roughly correspond to function words in linguistics, including punctuation marks, articles, prepositions, and conjunctions, in contrast to content tokens. We provide extensive experimental evidence supporting this hypothesis. Using bipartite graph analysis, we show that a small number of function tokens activate the majority of features. Case studies further reveal how function tokens activate the most predictive features from context to direct next token prediction. We also find that during pre-training, the training loss is dominated by predicting the next content tokens following function tokens, which forces the function tokens to select the most predictive features from context.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07923v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>STEPER：通过分步知识蒸馏增强多步检索增强语言模型推理能力
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            STEPER: Step-wise Knowledge Distillation for Enhancing Reasoning Ability in Multi-Step Retrieval-Augmented Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kyumin Lee, Minjin Jeon, Sanghwan Jang, Hwanjo Yu
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究多步检索增强语言模型中推理能力的提升问题，核心思想是通过分步监督和难度感知训练，针对不同推理阶段的信息需求和能力差异进行知识蒸馏。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文针对多步检索增强语言模型提出分步知识蒸馏方法，直接关联LLM在复杂推理任务中的应用，与推荐和搜索系统中的多步推理需求高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:20:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07923v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07923v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Answering complex real-world questions requires step-by-step retrieval and integration of relevant information to generate well-grounded responses. However, existing knowledge distillation methods overlook the need for different reasoning abilities at different steps, hindering transfer in multi-step retrieval-augmented frameworks. To address this, we propose Stepwise Knowledge Distillation for Enhancing Reasoning Ability in Multi-Step Retrieval-Augmented Language Models (StepER). StepER employs step-wise supervision to align with evolving information and reasoning demands across stages. Additionally, it incorporates difficulty-aware training to progressively optimize learning by prioritizing suitable steps. Our method is adaptable to various multi-step retrieval-augmented language models, including those that use retrieval queries for reasoning paths or decomposed questions. Extensive experiments show that StepER outperforms prior methods on multi-hop QA benchmarks, with an 8B model achieving performance comparable to a 70B teacher model.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08281v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>通过目标分解与重构的移动游戏玩家终身价值预测
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Mobile Gamer Lifetime Value Prediction via Objective Decomposition and Reconstruction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tianwei Li, Yu Zhao, Yunze Li, Sheng Li
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究移动游戏广告中用户终身价值预测的分布复杂性挑战，核心思想是通过将LTV预测分解为特定价格交易次数预测，然后重构总支付金额来应对分布异常值问题。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对广告系统中的用户终身价值预测问题，提出了分解重构的建模方法，与广告领域优化和推荐系统核心挑战高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:33:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08281v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08281v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    For Internet platforms operating real-time bidding (RTB) advertising service, a comprehensive understanding of user lifetime value (LTV) plays a pivotal role in optimizing advertisement allocation efficiency and maximizing the return on investment (ROI) for advertisement sponsors, thereby facilitating growth of commercialization revenue for the platform. However, the inherent complexity of user LTV distributions induces significant challenges in accurate LTV prediction. Existing state-of-the-art works, which primarily focus on directly learning the LTV distributions through well-designed loss functions, achieve limited success due to their vulnerability to outliers. In this paper, we proposed a novel LTV prediction method to address distribution challenges through an objective decomposition and reconstruction framework. Briefly speaking, based on the in-app purchase characteristics of mobile gamers, our model was designed to first predict the number of transactions at specific prices and then calculate the total payment amount from these intermediate predictions. Our proposed model was evaluated through experiments on real-world industrial dataset, and deployed on the TapTap RTB advertising system for online A/B testing along with the state-of-the-art ZILN model.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08252v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>ReasonEmbed：用于推理密集型文档检索的增强文本嵌入
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ReasonEmbed: Enhanced Text Embeddings for Reasoning-Intensive Document Retrieval
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jianlyu Chen, Junwei Lan, Chaofan Li, Defu Lian, Zheng Liu
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究推理密集型文档检索中的文本嵌入问题，核心方法是提出ReMixer数据合成技术解决合成数据简单化问题，并设计Redapter自适应学习算法动态调整训练权重以捕捉复杂语义关系。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于推理密集型文档检索的文本嵌入技术，其数据合成和自适应学习算法在搜索和推荐系统中具有直接应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:10:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08252v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08252v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In this paper, we introduce ReasonEmbed, a novel text embedding model developed for reasoning-intensive document retrieval. Our work includes three key technical contributions. First, we propose ReMixer, a new data synthesis method that overcomes the triviality problem prevalent in previous synthetic datasets, enabling large-scale production of 82K high-quality training samples. Second, we design Redapter, a self-adaptive learning algorithm that dynamically adjusts training each sample's weight based on its reasoning intensity. This allows the model to effectively capture the complex semantic relationships between queries and documents. Third, we implement ReasonEmbed across multiple backbones of varying sizes, all of which achieve superior performance on reasoning-intensive retrieval tasks. Notably, our ReasonEmbed-Qwen3-8B model offers a record-high nDCG@10 score of 38.1 on the BRIGHT benchmark, which significantly outperforms existing text embedding models. We will fully open-source our created resources in ReasonEmbed to push forward the research advancement in this field.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08517v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>CaRT：教导LLM智能体知晓何时已掌握足够信息
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CaRT: Teaching LLM Agents to Know When They Know Enough
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Grace Liu, Yuxiao Qu, Jeff Schneider, Aarti Singh, Aviral Kumar
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究LLM代理在多轮交互任务中如何确定停止信息收集的最佳时机，核心方法是使用反事实轨迹对和言语推理来训练LLM学习终止决策的理性判断能力。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的CaRT方法直接解决LLM代理在信息收集过程中的终止决策问题，这与推荐和搜索系统中用户交互的多轮决策优化高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:46:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08517v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08517v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Many tasks require learned models to strategically gather relevant information over multiple rounds of interaction before actually acting on a task. Strategic information gathering requires models to know not only how to effectively acquire information, but also when to stop gathering information and make a decision, in order to avoid overthinking or getting derailed when acting. In this paper, we formalize this problem and introduce Counterfactuals and Reasoning for Termination (CaRT), an approach for teaching LLMs when to stop seeking information. To appropriately learn when to terminate, CaRT fine-tunes LLMs using counterfactual pairs of trajectories, one where termination is appropriate and a minimally modified version of the same trajectory where it is not. It trains the LLM to explain the rationale for the termination decision in either case via verbal reasoning, and imbues this capability into the base LLM via fine-tuning. We instantiate CaRT in two domains: interactive medical diagnosis and math problem solving. In both domains, we find that CaRT improves the efficiency of information gathering and task success rate compared to other fine-tuning methods.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08372v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>关于表示选择与上下文学习之间关系的研究
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            On the Relationship Between the Choice of Representation and In-Context Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ioana Marinescu, Kyunghyun Cho, Eric Karl Oermann
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究ICL中演示表示与学习能力之间的关系问题，核心发现是表示质量决定ICL基线性能而学习能力独立存在，两者具有正交性。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文深入研究了ICL中表示与学习能力的关系，这对理解LLM在推荐搜索中的上下文学习机制至关重要。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 15:55:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08372v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08372v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In-context learning (ICL) is the ability of a large language model (LLM) to learn a new task from a few demonstrations presented as part of the context. Past studies have attributed a large portion of the success of ICL to the way these in-context demonstrations are represented, particularly to how labels are represented in classification tasks. On the other hand, observations of the learning capacity of ICL (i.e., the extent to which more in-context demonstrations can lead to higher performance) have been mixed, and ICL is often thought to occur only under specific conditions. The interaction between these two aspects in ICL, representation and learning, has not been studied in depth until now. We hypothesize that they are largely independent of one another, such that the representation of demonstrations determines the baseline accuracy of ICL, while learning from additional demonstrations improves only on top of this baseline. We validate this hypothesis by developing an optimization algorithm that can enumerate a spectrum of possible label sets (representations) varying in semantic relevance. We then perform ICL with varying numbers of in-context demonstrations for each of these label sets. We observed that learning happens regardless of the quality of the label set itself, although its efficiency, measured by the slope of improvement over in-context demonstrations, is conditioned on both the label set quality and the parameter count of the underlying language model. Despite the emergence of learning, the relative quality (accuracy) of the choice of a label set (representation) is largely maintained throughout learning, confirming our hypothesis and implying their orthogonality. Our work reveals a previously underexplored aspect of ICL: the independent effects of learning from demonstrations and their representations on ICL performance.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08276v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>超越轮次限制：使用动态上下文窗口训练深度搜索智能体
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Turn Limits: Training Deep Search Agents with Dynamic Context Window
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qiaoyu Tang, Hao Xiang, Le Yu, Bowen Yu, Yaojie Lu, Xianpei Han, Le Sun, WenJuan...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究多轮交互智能体中长序列推理能力受限的问题，核心方法是引入动态上下文管理策略，通过滑动窗口机制处理持续扩展的长序列上下文，无需依赖外部摘要模型。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出动态上下文窗口管理方法解决多轮交互中的长序列处理问题，直接关联推荐系统和搜索领域的序列建模与上下文管理需求。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:31:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08276v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08276v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    While recent advances in reasoning models have demonstrated cognitive behaviors through reinforcement learning, existing approaches struggle to invoke deep reasoning capabilities in multi-turn agents with long-horizon interactions. We propose DeepMiner, a novel framework that elicits such abilities by introducing high-difficulty training tasks and dynamic context window. DeepMiner presents a reverse construction method to generate complex but verifiable question-answer pairs from authentic web sources, which ensures the challenge and reliability of training data while injecting cognitive capabilities into multi-turn reasoning scenarios. We further design an elegant yet effective dynamic context management strategy for both training and inference, utilizing sliding window mechanisms while eliminating the dependency on external summarization models, thereby efficiently empowering the model to handle continuously expanding long-horizon contexts. Through reinforcement learning on Qwen3-32B, we develop DeepMiner-32B, which achieves substantial performance improvements across multiple search agent benchmarks. DeepMiner attains 33.5% accuracy on BrowseComp-en, surpassing the previous best open-source agent by almost 20 percentage points, and demonstrates consistent improvements on BrowseComp-zh, XBench-DeepSearch, and GAIA. Notably, our dynamic context management enables sustained interactions of nearly 100 turns within standard 32k context length, effectively addressing the context limitations that constrain existing multi-turn interaction systems.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08152v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>DACIP-RC：基于商业对话阅读理解任务的领域自适应持续指令预训练
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DACIP-RC: Domain Adaptive Continual Instruction Pre-Training via Reading Comprehension on Business Conversations
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Elena Khasanova, Harsh Saini, Md Tahmid Rahman Laskar, Xue-Yong Fu, Cheng Chen, ...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何提升小型LLM在商业对话任务中的领域适应能力，核心方法是采用基于阅读理解的持续指令预训练技术，通过对话转录生成多样化任务指令和响应来增强指令泛化能力。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出通过阅读理解实现领域自适应持续指令预训练，直接针对LLM在商业对话任务中的领域适应问题，与直接LLM应用和领域适应高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 12:35:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08152v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08152v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid advancements in Large Language Models (LLMs) have enabled their adoption in real-world industrial scenarios for various natural language processing tasks. However, the high inference cost of large-scale LLMs makes their deployment impractical, necessitating the use of smaller models. Despite their efficiency, smaller LLMs lack robust zero-shot instruction-following capabilities across diverse domains, limiting their adaptability to dynamic user requirements. Traditional fine-tuning approaches exacerbate this issue by inducing catastrophic forgetting, reducing the model's generalization ability for unseen tasks. In this paper, we propose Domain Adaptive Continual Instruction Pre-Training via Reading Comprehension (DACIP-RC), a continual pre-training technique that enhances smaller LLMs' domain adaptability for business conversational tasks. Unlike conventional pre-training approaches that rely on next-token prediction, DACIP-RC generates diverse task instructions and responses via reading comprehension on conversation transcripts, enabling better instruction generalization. Our empirical evaluations demonstrate that DACIP-RC significantly improves zero-shot generalization across a wide range of business conversational tasks, including meeting summarization, action item generation, and call purpose identification. To the best of our knowledge, this is the first work to apply instruction pre-training on business conversational data, providing insights into how industries can leverage proprietary datasets for domain adaptation.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07841v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>测试时自改进的大语言模型智能体
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Self-Improving LLM Agents at Test-Time
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Emre Can Acikgoz, Cheng Qian, Heng Ji, Dilek Hakkani-Tür, Gokhan Tur
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究如何解决传统大模型微调依赖海量训练数据且效率低下的问题，核心思想是让模型在测试时通过识别自身困难样本、生成类似示例并进行在线微调来实现自我改进。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的测试时自改进方法直接应用于LLM智能体，属于LLM直接应用和使能技术范畴，对推荐和搜索系统的在线学习有重要参考价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 06:37:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07841v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07841v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    One paradigm of language model (LM) fine-tuning relies on creating large training datasets, under the assumption that high quantity and diversity will enable models to generalize to novel tasks after post-training. In practice, gathering large sets of data is inefficient, and training on them is prohibitively expensive; worse, there is no guarantee that the resulting model will handle complex scenarios or generalize better. Moreover, existing techniques rarely assess whether a training sample provides novel information or is redundant with the knowledge already acquired by the model, resulting in unnecessary costs. In this work, we explore a new test-time self-improvement method to create more effective and generalizable agentic LMs on-the-fly. The proposed algorithm can be summarized in three steps: (i) first it identifies the samples that model struggles with (self-awareness), (ii) then generates similar examples from detected uncertain samples (self-data augmentation), and (iii) uses these newly generated samples at test-time fine-tuning (self-improvement). We study two variants of this approach: Test-Time Self-Improvement (TT-SI), where the same model generates additional training examples from its own uncertain cases and then learns from them, and contrast this approach with Test-Time Distillation (TT-D), where a stronger model generates similar examples for uncertain cases, enabling student to adapt using distilled supervision. Empirical evaluations across different agent benchmarks demonstrate that TT-SI improves the performance with +5.48% absolute accuracy gain on average across all benchmarks and surpasses other standard learning methods, yet using 68x less training samples. Our findings highlight the promise of TT-SI, demonstrating the potential of self-improvement algorithms at test-time as a new paradigm for building more capable agents toward self-evolution.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08102v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>自回归语言模型的无损词汇表缩减
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Lossless Vocabulary Reduction for Auto-Regressive Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Daiki Chijiwa, Taku Hasegawa, Kyosuke Nishida, Shin'ya Yamaguchi, Tomoya Ohba, T...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究自回归语言模型因不同词汇表导致的模型协作困难问题，核心思想是通过理论框架将模型无损转换为任意小词汇表，实现不同分词模型的高效协同。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出无损词汇缩减框架，直接提升自回归语言模型的效率与互操作性，对LLM核心技术进步有重要价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 11:38:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08102v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08102v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span><span class="category-tag">stat.ML</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Tokenization -- the process of decomposing a given text into a sequence of subwords called tokens -- is one of the key components in the development of language models. Particularly, auto-regressive language models generate texts token by token, i.e., by predicting the next-token distribution given the previous ones, and thus tokenization directly affects their efficiency in text generation. Since each language model has their own vocabulary as a set of possible tokens, they struggle to cooperate with each other at the level of next-token distributions such as model ensemble. In this paper, we establish a theoretical framework of lossless vocabulary reduction, which efficiently converts a given auto-regressive language model into the one with an arbitrarily small vocabulary without any loss in accuracy. As an application, we demonstrate that language models with different tokenization can cooperate with each other efficiently through their maximal common vocabulary.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07796v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>HySim-LLM：面向领域自适应大语言模型的嵌入加权微调边界与流形去噪
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            HySim-LLM: Embedding-Weighted Fine-Tuning Bounds and Manifold Denoising for Domain-Adapted LLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Majid Jaberi-Douraki, Hossein Sholehrasa, Xuan Xu, Remya Ampadi Ramachandran
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究领域适应中LLM处理异构结构化数据的鲁棒性问题，核心思想是通过嵌入加权微调边界和流形去噪理论来提升模型在领域迁移中的泛化能力和可解释性。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出了嵌入加权微调和流形去噪的理论框架，虽然应用于生物医学领域，但其核心方法对推荐系统中处理异构数据和领域适应具有直接参考价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 05:16:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07796v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07796v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The extraction and standardization of pharmacokinetic (PK) information from scientific literature remain significant challenges in computational pharmacology, which limits the reliability of data-driven models in drug development. Large language models (LLMs) have achieved remarkable progress in text understanding and reasoning, yet their adaptation to structured biomedical data, such as PK tables, remains constrained by heterogeneity, noise, and domain shift. To address these limitations, we propose HySim-LLM, a unified mathematical and computational framework that integrates embedding-weighted fine-tuning and manifold-aware denoising to enhance the robustness and interpretability of LLMs. We establish two theoretical results: (1) a similarity-weighted generalization bound that quantifies adaptation performance under embedding divergence, and (2) a manifold-based denoising guarantee that bounds loss contributions from noisy or off-manifold samples. These theorems provide a principled foundation for fine-tuning LLMs in structured biomedical settings. The framework offers a mathematically grounded pathway toward reliable and interpretable LLM adaptation for biomedical and data-intensive scientific domains.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08189v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>R-Horizon：您的大型推理模型在广度和深度上究竟能走多远？
            </a>
        </h3>
        <span class="score-badge bg-blue-100 text-blue-800">
            <i class="fa fa-star mr-1"></i>4/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            R-Horizon: How Far Can Your Large Reasoning Model Really Go in Breadth and Depth?
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yi Lu, Jianing Wang, Linsen Guo, Wei He, Hongyin Tang, Tao Gui, Xuanjing Huang, ...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究如何评估大型推理模型在复杂长序列任务中的真实能力。核心方法是提出R-HORIZON评估框架，通过查询组合构建长视野推理基准来激发模型的深度推理行为。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注推理模型的评估方法改进，与推荐系统、搜索广告的直接应用关联较弱，但在长序列建模和复杂推理方面有一定启发价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 13:16:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08189v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08189v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent trends in test-time scaling for reasoning models (e.g., OpenAI o1, DeepSeek-R1) have led to remarkable improvements through long Chain-of-Thought (CoT). However, existing benchmarks mainly focus on immediate, single-horizon tasks, failing to adequately evaluate models' ability to understand and respond to complex, long-horizon scenarios. To address this incomplete evaluation of Large Reasoning Models (LRMs), we propose R-HORIZON, a method designed to stimulate long-horizon reasoning behaviors in LRMs through query composition. Based on R-HORIZON, we construct a long-horizon reasoning benchmark, comprising complex multi-step reasoning tasks with interdependent problems that span long reasoning horizons. Through comprehensive evaluation of LRMs using the R-HORIZON benchmark, we find that even the most advanced LRMs suffer significant performance degradation. Our analysis reveals that LRMs exhibit limited effective reasoning length and struggle to allocate thinking budget across multiple problems appropriately. Recognizing these limitations, we use R-HORIZON to construct long-horizon reasoning data for reinforcement learning with verified rewards (RLVR). Compared to training with single-horizon data, RLVR with R-HORIZON not only substantially improves performance on the multi-horizon reasoning tasks, but also promotes accuracy on standard reasoning tasks, with an increase of 7.5 on AIME2024. These results position R-HORIZON as a scalable, controllable, and low-cost paradigm for enhancing and evaluating the long-horizon reasoning capabilities of LRMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07812v1" target="_blank" rel="noopener noreferrer">
                基于跨语言语义压缩的多语言生成式检索
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Multilingual Generative Retrieval via Cross-lingual Semantic Compression
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuxin Huang, Simeng Wu, Ran Song, Yan Xiang, Yantuan Xian, Shengxiang Gao, Zheng...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及生成式检索技术，这是搜索系统中的核心进展。跨语言语义压缩技术可以显著提升多语言搜索系统的效率和效果，直接应用于多语言搜索场景。这种压缩方法也有潜力应用于推荐系统中的多语言内容理解和用户兴趣建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 05:42:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07812v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07812v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Generative Information Retrieval is an emerging retrieval paradigm that exhibits remarkable performance in monolingual scenarios.However, applying these methods to multilingual retrieval still encounters two primary challenges, cross-lingual identifier misalignment and identifier inflation. To address these limitations, we propose Multilingual Generative Retrieval via Cross-lingual Semantic Compression (MGR-CSC), a novel framework that unifies semantically equivalent multilingual keywords into shared atoms to align semantics and compresses the identifier space, and we propose a dynamic multi-step constrained decoding strategy during retrieval. MGR-CSC improves cross-lingual alignment by assigning consistent identifiers and enhances decoding efficiency by reducing redundancy. Experiments demonstrate that MGR-CSC achieves outstanding retrieval accuracy, improving by 6.83% on mMarco100k and 4.77% on mNQ320k, while reducing document identifiers length by 74.51% and 78.2%, respectively.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07799v1" target="_blank" rel="noopener noreferrer">
                基于图扩散模型动态生成多LLM智能体通信拓扑
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Dynamic Generation of Multi-LLM Agents Communication Topologies with Graph Diffusion Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Eric Hanchen Jiang, Guancheng Wan, Sophia Yin, Mengting Li, Yuchen Wu, Xiao Lian...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及多LLM智能体系统，这属于'直接LLM应用'范畴，在推荐系统和搜索中可用于构建协同推理框架。图扩散模型用于动态优化通信拓扑，这种技术可应用于多智能体推荐系统中协调不同LLM专家的交互模式，提升整体系统性能。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 05:28:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07799v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07799v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The efficiency of multi-agent systems driven by large language models (LLMs) largely hinges on their communication topology. However, designing an optimal topology is a non-trivial challenge, as it requires balancing competing objectives such as task performance, communication cost, and robustness. Existing frameworks often rely on static or hand-crafted topologies, which inherently fail to adapt to diverse task requirements, leading to either excessive token consumption for simple problems or performance bottlenecks for complex ones. To address this challenge, we introduce a novel generative framework called \textit{Guided Topology Diffusion (GTD)}. Inspired by conditional discrete graph diffusion models, GTD formulates topology synthesis as an iterative construction process. At each step, the generation is steered by a lightweight proxy model that predicts multi-objective rewards (e.g., accuracy, utility, cost), enabling real-time, gradient-free optimization towards task-adaptive topologies. This iterative, guided synthesis process distinguishes GTD from single-step generative frameworks, enabling it to better navigate complex design trade-offs. We validated GTD across multiple benchmarks, and experiments show that this framework can generate highly task-adaptive, sparse, and efficient communication topologies, significantly outperforming existing methods in LLM agent collaboration.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07794v1" target="_blank" rel="noopener noreferrer">
                HiPRAG：用于高效代理检索增强生成的分层过程奖励
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            HiPRAG: Hierarchical Process Rewards for Efficient Agentic Retrieval Augmented Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Peilin Wu, Mian Zhang, Kun Wan, Wentian Zhao, Kaiyu He, Xinya Du, Zhiyu Chen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及检索增强生成(RAG)的代理方法，这直接适用于搜索系统，其中代理可以主动检索和整合信息以响应用户查询。分层过程奖励机制通过优化检索和生成过程，可以显著提升搜索和推荐系统中的内容质量和效率，减少不相关信息的检索。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 05:13:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07794v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07794v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Agentic RAG is a powerful technique for incorporating external information that LLMs lack, enabling better problem solving and question answering. However, suboptimal search behaviors exist widely, such as over-search (retrieving information already known) and under-search (failing to search when necessary), which leads to unnecessary overhead and unreliable outputs. Current training methods, which typically rely on outcome-based rewards in a RL framework, lack the fine-grained control needed to address these inefficiencies. To overcome this, we introduce Hierarchical Process Rewards for Efficient agentic RAG (HiPRAG), a training methodology that incorporates a fine-grained, knowledge-grounded process reward into the RL training. Our approach evaluates the necessity of each search decision on-the-fly by decomposing the agent's reasoning trajectory into discrete, parsable steps. We then apply a hierarchical reward function that provides an additional bonus based on the proportion of optimal search and non-search steps, on top of commonly used outcome and format rewards. Experiments on the Qwen2.5 and Llama-3.2 models across seven diverse QA benchmarks show that our method achieves average accuracies of 65.4% (3B) and 67.2% (7B). This is accomplished while improving search efficiency, reducing the over-search rate to just 2.3% and concurrently lowering the under-search rate. These results demonstrate the efficacy of optimizing the reasoning process itself, not just the final outcome. Further experiments and analysis demonstrate that HiPRAG shows good generalizability across a wide range of RL algorithms, model families, sizes, and types. This work demonstrates the importance and potential of fine-grained control through RL, for improving the efficiency and optimality of reasoning for search agents.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07768v1" target="_blank" rel="noopener noreferrer">
                ToolLibGen：面向大语言模型推理的可扩展自动工具创建与聚合
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ToolLibGen: Scalable Automatic Tool Creation and Aggregation for LLM Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Murong Yue, Zhiwei Liu, Liangwei Yang, Jianguo Zhang, Zuxin Liu, Haolin Chen, Zi...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文属于Enabling LLM Tech范畴，专注于提升LLM的工具使用和推理能力。在搜索和推荐系统中，这种自动工具创建技术可以用于构建更智能的查询理解、多模态信息检索和复杂用户意图推理模块，显著提升系统处理复杂任务的能力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 04:11:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07768v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07768v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) equipped with external tools have demonstrated enhanced performance on complex reasoning tasks. The widespread adoption of this tool-augmented reasoning is hindered by the scarcity of domain-specific tools. For instance, in domains such as physics question answering, suitable and specialized tools are often missing. Recent work has explored automating tool creation by extracting reusable functions from Chain-of-Thought (CoT) reasoning traces; however, these approaches face a critical scalability bottleneck. As the number of generated tools grows, storing them in an unstructured collection leads to significant retrieval challenges, including an expanding search space and ambiguity between function-related tools. To address this, we propose a systematic approach to automatically refactor an unstructured collection of tools into a structured tool library. Our system first generates discrete, task-specific tools and clusters them into semantically coherent topics. Within each cluster, we introduce a multi-agent framework to consolidate scattered functionalities: a code agent refactors code to extract shared logic and creates versatile, aggregated tools, while a reviewing agent ensures that these aggregated tools maintain the complete functional capabilities of the original set. This process transforms numerous question-specific tools into a smaller set of powerful, aggregated tools without loss of functionality. Experimental results demonstrate that our approach significantly improves tool retrieval accuracy and overall reasoning performance across multiple reasoning tasks. Furthermore, our method shows enhanced scalability compared with baselines as the number of question-specific increases.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08564v1" target="_blank" rel="noopener noreferrer">
                如何教授大型多模态模型新技能
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            How to Teach Large Multimodal Models New Skills
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhen Zhu, Yiming Gong, Yao Xiao, Yaoyao Liu, Derek Hoiem
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及大型多模态模型的新技能学习，属于'Enabling LLM Tech'范畴，对推荐系统和搜索有直接应用价值。在推荐系统中，可以用于快速适应新的用户行为模式或商品特征；在搜索中，能够帮助模型快速学习新的查询意图或文档类型，提升系统适应性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08564v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08564v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    How can we teach large multimodal models (LMMs) new skills without erasing prior abilities? We study sequential fine-tuning on five target skills while monitoring general ability on eight held-out benchmarks across three model families. We observe that apparent "forgetting" on held-out tasks after narrow fine-tuning can partly recover at later stages. We trace this behavior to a measurable shift in the output token distribution, manifested through a simple counting-bias probe that co-varies with forgetting. Guided by this picture, we identify two simple, robust tuning recipes that learn strongly while limiting drift: (i) updating only the self-attention projection layers, and (ii) updating only the MLP Gate&Up while freezing the Down projection. Across models and tasks, these choices deliver strong target gains while largely preserving held-out performance. Code is available at https://github.com/jessemelpolio/LMM_CL
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08565v1" target="_blank" rel="noopener noreferrer">
                NaViL：在数据约束下重新思考原生多模态大语言模型的缩放特性
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            NaViL: Rethinking Scaling Properties of Native Multimodal Large Language Models under Data Constraints
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Changyao Tian, Hao Li, Gen Luo, Xizhou Zhu, Weijie Su, Hanming Deng, Jinguo Zhu,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究多模态LLM在数据受限情况下的缩放特性，直接属于'Enabling LLM Tech'范畴。多模态LLM的缩放特性和数据效率研究对于RecSys/Ads至关重要，因为推荐和广告系统经常面临多模态数据（文本、图像、视频）且数据分布不均衡的问题，这些发现可以帮助构建更高效的多模态推荐模型。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08565v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08565v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Compositional training has been the de-facto paradigm in existing Multimodal Large Language Models (MLLMs), where pre-trained vision encoders are connected with pre-trained LLMs through continuous multimodal pre-training. However, the multimodal scaling property of this paradigm remains difficult to explore due to the separated training. In this paper, we focus on the native training of MLLMs in an end-to-end manner and systematically study its design space and scaling property under a practical setting, i.e., data constraint. Through careful study of various choices in MLLM, we obtain the optimal meta-architecture that best balances performance and training cost. After that, we further explore the scaling properties of the native MLLM and indicate the positively correlated scaling relationship between visual encoders and LLMs. Based on these findings, we propose a native MLLM called NaViL, combined with a simple and cost-effective recipe. Experimental results on 14 multimodal benchmarks confirm the competitive performance of NaViL against existing MLLMs. Besides that, our findings and results provide in-depth insights for the future study of native MLLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08540v1" target="_blank" rel="noopener noreferrer">
                MM-HELIX：通过整体平台与自适应混合策略优化增强多模态长链反思推理
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MM-HELIX: Boosting Multimodal Long-Chain Reflective Reasoning with Holistic Platform and Adaptive Hybrid Policy Optimization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiangyu Zhao, Junming Lin, Tianhao Liang, Yifan Zhou, Wenhao Chai, Yuzhe Gu, Wei...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及多模态长链推理技术，属于核心LLM进展，具有在搜索和推荐系统中应用的明确潜力。增强的反思推理能力可显著提升复杂用户查询理解、多轮对话推荐以及跨模态内容理解，直接服务于搜索和推荐系统的核心需求。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:53:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08540v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08540v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    While current Multimodal Large Language Models (MLLMs) have demonstrated proficiency in reasoning tasks such as mathematics and logic, their capacity for long-chain reflective reasoning, a prerequisite for solving complex real-world problems, remains largely underexplored. In this work, we first conduct an extensive empirical investigation to evaluate this capability. Leveraging a carefully designed data synthesis engine, we construct MM-HELIX, a multimodal benchmark consisting 1,260 samples of 42 challenging synthetic tasks that require iterative thinking and backtracking. Empirical results on this benchmark reveal that existing MLLMs exhibit significant performance deficits in long-chain reflective reasoning. To address this limitation, we generate post-training data and further explore learning paradigms for exploiting such data. We first develop the Step-Elicited Response Generation pipeline to create MM-HELIX-100K, a large-scale dataset of 100k high-quality, reflective reasoning traces for instruction-tuning stage. Given that standard Reinforcement Learning fails on complex tasks due to sparse reward signals and catastrophic forgetting after Supervised Fine-Tuning, we propose Adaptive Hybrid Policy Optimization (AHPO), a novel training strategy that dynamically unifies offline supervision and online optimization into a single stage. This strategy enables the model to learn from expert data when rewards are sparse and conduct independent exploration once proficient. When applied to the Qwen2.5-VL-7B baseline, our method achieves a +18.6\% accuracy improvement on MM-HELIX benchmark and demonstrates strong generalization with a +5.7\% average performance gain on general mathematic and logic tasks. Our work demonstrate that reflective reasoning in MLLMs can be effectively learned and generalized, paving the way for developing more capable MLLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08492v1" target="_blank" rel="noopener noreferrer">
                协同增效：利用未配对多模态数据构建更强大的单模态模型
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Better Together: Leveraging Unpaired Multimodal Data for Stronger Unimodal Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sharut Gupta, Shobhita Sundaram, Chenyu Wang, Stefanie Jegelka, Phillip Isola
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及多模态数据利用和模型增强，直接关联'VLM Analogy for Heterogeneous Data'焦点，将不同数据模态视为独立输入进行统一建模。在推荐系统和搜索场景中，可应用于处理用户行为序列、上下文特征等异构数据，通过多模态协同提升单模态模型的性能表现。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:32:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08492v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08492v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Traditional multimodal learners find unified representations for tasks like visual question answering, but rely heavily on paired datasets. However, an overlooked yet potentially powerful question is: can one leverage auxiliary unpaired multimodal data to directly enhance representation learning in a target modality? We introduce UML: Unpaired Multimodal Learner, a modality-agnostic training paradigm in which a single model alternately processes inputs from different modalities while sharing parameters across them. This design exploits the assumption that different modalities are projections of a shared underlying reality, allowing the model to benefit from cross-modal structure without requiring explicit pairs. Theoretically, under linear data-generating assumptions, we show that unpaired auxiliary data can yield representations strictly more informative about the data-generating process than unimodal training. Empirically, we show that using unpaired data from auxiliary modalities -- such as text, audio, or images -- consistently improves downstream performance across diverse unimodal targets such as image and audio. Our project page: https://unpaired-multimodal.github.io/
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08109v1" target="_blank" rel="noopener noreferrer">
                VersionRAG：面向演进文档的版本感知检索增强生成
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VersionRAG: Version-Aware Retrieval-Augmented Generation for Evolving Documents
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Daniel Huwiler, Kurt Stockinger, Jonathan Fürst
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的版本感知RAG技术属于LLM核心技术的进展，在搜索和推荐系统中具有直接应用潜力。当文档内容随时间演进时（如产品描述、新闻文章、政策法规），版本感知检索能够确保系统返回最新且相关的信息，这对于搜索结果的时效性和推荐系统的准确性至关重要。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 11:48:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08109v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08109v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Retrieval-Augmented Generation (RAG) systems fail when documents evolve through versioning-a ubiquitous characteristic of technical documentation. Existing approaches achieve only 58-64% accuracy on version-sensitive questions, retrieving semantically similar content without temporal validity checks. We present VersionRAG, a version-aware RAG framework that explicitly models document evolution through a hierarchical graph structure capturing version sequences, content boundaries, and changes between document states. During retrieval, VersionRAG routes queries through specialized paths based on intent classification, enabling precise version-aware filtering and change tracking. On our VersionQA benchmark-100 manually curated questions across 34 versioned technical documents-VersionRAG achieves 90% accuracy, outperforming naive RAG (58%) and GraphRAG (64%). VersionRAG reaches 60% accuracy on implicit change detection where baselines fail (0-10%), demonstrating its ability to track undocumented modifications. Additionally, VersionRAG requires 97% fewer tokens during indexing than GraphRAG, making it practical for large-scale deployment. Our work establishes versioned document QA as a distinct task and provides both a solution and benchmark for future research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07720v1" target="_blank" rel="noopener noreferrer">
                查询并非孤立：基于文本嵌入聚类的视频搜索
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Queries Are Not Alone: Clustering Text Embeddings for Video Search
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Peyang Liu, Xi Wang, Ziqiang Cui, Wei Ye
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于搜索领域，通过文本嵌入聚类技术改进视频搜索效果，这直接属于核心搜索领域的进展。虽然论文具体针对视频搜索，但其基于文本嵌入和聚类的方法可以推广到通用的搜索和推荐系统中，用于处理用户查询的多样性和语义理解。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 02:56:18
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07720v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07720v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid proliferation of video content across various platforms has highlighted the urgent need for advanced video retrieval systems. Traditional methods, which primarily depend on directly matching textual queries with video metadata, often fail to bridge the semantic gap between text descriptions and the multifaceted nature of video content. This paper introduces a novel framework, the Video-Text Cluster (VTC), which enhances video retrieval by clustering text queries to capture a broader semantic scope. We propose a unique clustering mechanism that groups related queries, enabling our system to consider multiple interpretations and nuances of each query. This clustering is further refined by our innovative Sweeper module, which identifies and mitigates noise within these clusters. Additionally, we introduce the Video-Text Cluster-Attention (VTC-Att) mechanism, which dynamically adjusts focus within the clusters based on the video content, ensuring that the retrieval process emphasizes the most relevant textual features. Further experiments have demonstrated that our proposed model surpasses existing state-of-the-art models on five public datasets.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08470v1" target="_blank" rel="noopener noreferrer">
                观察学习：面向低资源视觉语言建模的令牌级动态门控机制
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Looking to Learn: Token-wise Dynamic Gating for Low-Resource Vision-Language Modelling
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Bianca-Mihaela Ganescu, Suchir Salhan, Andrew Caines, Paula Buttery
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的令牌级动态门控机制属于Transformer架构效率优化范畴，是Enabling Transformer Tech的典型代表。这种动态计算机制可应用于推荐系统中处理用户行为序列和上下文特征，通过智能分配计算资源来提升长序列建模效率，同时其低资源特性对大规模工业推荐系统具有重要价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:10:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08470v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08470v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Training vision-language models on cognitively-plausible amounts of data requires rethinking how models integrate multimodal information. Within the constraints of the Vision track for the BabyLM Challenge 2025, we propose a lightweight decoder-based architecture with (1) token-wise dynamic gating for adaptive fusion of linguistic and visual cues, (2) feature modulation and channel attention to maximise the utility of limited visual information and (3) auxiliary contrastive objectives for visual grounding. Evaluation on five benchmarks (BLiMP, BLiMP Supplement, EWoK, Winoground and VQA) shows competitive or superior performance to multimodal baselines. More notably, our dynamic gate discovers interpretable patterns without explicit supervision, favouring visual cues for content words and linguistic cues for function words. While we identify limitations in the Challenge constraints, such as the information bottleneck created by global image embeddings and training instability from the dataset split, our findings establish dynamic gating as a powerful tool for efficient multimodal learning, offering both interpretability and performance even under severe constraints.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08457v1" target="_blank" rel="noopener noreferrer">
                ARES：基于难度感知的令牌级熵整形的多模态自适应推理
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ARES: Multimodal Adaptive Reasoning via Difficulty-Aware Token-Level Entropy Shaping
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuang Chen, Yue Guo, Yimeng Ye, Shijue Huang, Wenbo Hu, Haoxi Li, Manyuan Zhang...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出难度感知的令牌级熵整形技术，属于Transformer架构效率优化领域，与'使能Transformer技术'焦点相关。这种自适应推理机制可应用于推荐系统和搜索中的多模态内容理解，通过动态调整计算复杂度来提高大规模部署的效率。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:03:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08457v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08457v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in multimodal large reasoning models (MLRMs) have substantially improved their ability to solve complex textual and visual tasks. However, these models tend to overthink on simple problems, producing unnecessarily lengthy reasoning traces, while under-exploring on challenging ones, leading to missed solutions. To address this imbalance, we propose ARES, a unified open-source framework for adaptive reasoning that dynamically allocates exploration effort based on task difficulty. Our approach is motivated by two key empirical findings: (i) while single-token entropy is noisy, high window-entropy (HWE) tokens (token-level entropies averaged under a sliding window) can reliably capture reasoning-critical moments; and (ii) reducing HWE usage benefits easy problems, while increasing it is essential for solving hard ones. Building on these insights, ARES introduces a two-stage training pipeline. In the Adaptive Cold-Start stage, we curate multimodal and textual data paired with reasoning traces of length proportional to problem difficulty, equipping the model with initial difficulty awareness. In the second stage, we develop Adaptive Entropy Policy Optimization (AEPO), which uses HWE tokens as exploration triggers to decide when to explore, and a hierarchical entropy reward with dynamic KL control to decide how much to explore. Extensive experiments demonstrate that ARES achieves superior performance and reasoning efficiency across diverse mathematical, logical, and multimodal benchmarks, while closing the gap to leading commercial systems under significantly lower inference costs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07782v1" target="_blank" rel="noopener noreferrer">
                RCPU：大语言模型结构化剪枝的旋转约束误差补偿
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RCPU: Rotation-Constrained Error Compensation for Structured Pruning of a Large Language Model
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuichiro Haruta, Kazunori Matsumoto, Zhi Li, Yanan Wang, Mori Kurokawa
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及大语言模型的高效压缩技术，属于'使能LLM技术'范畴。结构化剪枝技术可以显著减少LLM的计算和内存需求，这对于在推荐系统、搜索和广告中部署大型模型至关重要，能够实现更快的推理速度和更低的部署成本。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 04:54:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07782v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07782v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In this paper, we propose a rotation-constrained compensation method to address the errors introduced by structured pruning of large language models (LLMs). LLMs are trained on massive datasets and accumulate rich semantic knowledge in their representation space. In contrast, pruning is typically carried out with only a small amount of calibration data, which makes output mismatches unavoidable. Although direct least-squares fitting can reduce such errors, it tends to overfit to the limited calibration set, destructively modifying pretrained weights. To overcome this difficulty, we update the pruned parameters under a rotation constraint. This constrained update preserves the geometry of output representations (i.e., norms and inner products) and simultaneously re-aligns the pruned subspace with the original outputs. Furthermore, in rotation-constrained compensation, removing components that strongly contribute to the principal directions of the output makes error recovery difficult. Since input dimensions with large variance strongly affect these principal directions, we design a variance-aware importance score that ensures such dimensions are preferentially kept in the pruned model. By combining this scoring rule with rotation-constrained updates, the proposed method effectively compensates errors while retaining the components likely to be more important in a geometry-preserving manner. In the experiments, we apply the proposed method to LLaMA-7B and evaluate it on WikiText-2 and multiple language understanding benchmarks. The results demonstrate consistently better perplexity and task accuracy compared with existing baselines.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07777v1" target="_blank" rel="noopener noreferrer">
                不再漂移？多轮大语言模型交互中的上下文均衡
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Drift No More? Context Equilibria in Multi-Turn LLM Interactions
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Vardhan Dongre, Ryan A. Rossi, Viet Dac Lai, David Seunghyun Yoon, Dilek Hakkani...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究多轮LLM交互中的上下文均衡问题，直接关联到LLM在推荐系统和搜索中的实际应用场景。在多轮对话推荐和搜索交互中，上下文漂移是影响用户体验的关键技术挑战，该研究可为构建更稳定的多轮推荐/搜索系统提供技术基础。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 04:48:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07777v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07777v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) excel at single-turn tasks such as instruction following and summarization, yet real-world deployments require sustained multi-turn interactions where user goals and conversational context persist and evolve. A recurring challenge in this setting is context drift: the gradual divergence of a model's outputs from goal-consistent behavior across turns. Unlike single-turn errors, drift unfolds temporally and is poorly captured by static evaluation metrics. In this work, we present a study of context drift in multi-turn interactions and propose a simple dynamical framework to interpret its behavior. We formalize drift as the turn-wise KL divergence between the token-level predictive distributions of the test model and a goal-consistent reference model, and propose a recurrence model that interprets its evolution as a bounded stochastic process with restoring forces and controllable interventions. We instantiate this framework in both synthetic long-horizon rewriting tasks and realistic user-agent simulations such as in $\tau$-Bench, measuring drift for several open-weight LLMs that are used as user simulators. Our experiments consistently reveal stable, noise-limited equilibria rather than runaway degradation, and demonstrate that simple reminder interventions reliably reduce divergence in line with theoretical predictions. Together, these results suggest that multi-turn drift can be understood as a controllable equilibrium phenomenon rather than as inevitable decay, providing a foundation for studying and mitigating context drift in extended interactions.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07745v1" target="_blank" rel="noopener noreferrer">
                潜在推理模型的并行测试时缩放
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Parallel Test-Time Scaling for Latent Reasoning Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Runyang You, Yongqi Li, Meng Liu, Wenjie Wang, Liqiang Nie, Wenjie Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及测试时缩放技术，这属于'使能LLM技术'范畴，通过优化推理阶段的模型扩展来提高效率。在推荐系统和搜索领域，这种技术可以显著降低大规模模型部署的延迟和计算成本，特别是在处理复杂用户序列和上下文特征时实现更高效的实时推理。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 03:33:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07745v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07745v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Parallel test-time scaling (TTS) is a pivotal approach for enhancing large language models (LLMs), typically by sampling multiple token-based chains-of-thought in parallel and aggregating outcomes through voting or search. Recent advances in latent reasoning, where intermediate reasoning unfolds in continuous vector spaces, offer a more efficient alternative to explicit Chain-of-Thought, yet whether such latent models can similarly benefit from parallel TTS remains open, mainly due to the absence of sampling mechanisms in continuous space, and the lack of probabilistic signals for advanced trajectory aggregation. \ This work enables parallel TTS for latent reasoning models by addressing the above issues. For sampling, we introduce two uncertainty-inspired stochastic strategies: Monte Carlo Dropout and Additive Gaussian Noise. For aggregation, we design a Latent Reward Model (LatentRM) trained with step-wise contrastive objective to score and guide latent reasoning. Extensive experiments and visualization analyses show that both sampling strategies scale effectively with compute and exhibit distinct exploration dynamics, while LatentRM enables effective trajectory selection. Together, our explorations open a new direction for scalable inference in continuous spaces. Code released at https://github.com/YRYangang/LatentTTS.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07884v1" target="_blank" rel="noopener noreferrer">
                对比式弱到强泛化
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Contrastive Weak-to-strong Generalization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Houcheng Jiang, Junfeng Fang, Jiaxin Wu, Tianyu Zhang, Chen Gao, Yong Li, Xiang ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注对比学习和弱到强泛化，这是LLM训练和知识蒸馏中的核心技术进步。在推荐系统和搜索领域，这种技术可以用于从大型教师模型向更高效的学生模型进行知识迁移，或者处理用户反馈中的弱监督信号，从而提高模型泛化能力和部署效率。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 07:37:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07884v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07884v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Weak-to-strong generalization provides a promising paradigm for scaling large language models (LLMs) by training stronger models on samples from aligned weaker ones, without requiring human feedback or explicit reward modeling. However, its robustness and generalization are hindered by the noise and biases in weak-model outputs, which limit its applicability in practice. To address this challenge, we leverage implicit rewards, which approximate explicit rewards through log-likelihood ratios, and reveal their structural equivalence with Contrastive Decoding (CD), a decoding strategy shown to reduce noise in LLM generation. Building on this connection, we propose Contrastive Weak-to-Strong Generalization (ConG), a framework that employs contrastive decoding between pre- and post-alignment weak models to generate higher-quality samples. This approach enables more reliable capability transfer, denoising, and improved robustness, substantially mitigating the limitations of traditional weak-to-strong methods. Empirical results across different model families confirm consistent improvements, demonstrating the generality and effectiveness of ConG. Taken together, our findings highlight the potential of ConG to advance weak-to-strong generalization and provide a promising pathway toward AGI.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08567v1" target="_blank" rel="noopener noreferrer">
                MATRIX：面向鲁棒工具使用推理的多模态智能体调优
            </a>
        </h3>
        <span class="score-badge bg-blue-100 text-blue-800">
            <i class="fa fa-star mr-1"></i>4/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MATRIX: Multimodal Agent Tuning for Robust Tool-Use Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tajamul Ashraf, Umair Nawaz, Abdelrahman M. Shaker, Rao Anwer, Philip Torr, Faha...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及多模态智能体和工具使用推理，属于LLM应用范畴。虽然多模态智能体在搜索和推荐系统中具有潜在应用价值（如处理异构数据和外部工具集成），但论文标题未明确表明与推荐系统、搜索或广告的直接关联，且工具使用推理可能更偏向通用AI助手应用而非核心排序任务。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08567v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08567v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision language models (VLMs) are increasingly deployed as controllers with access to external tools for complex reasoning and decision-making, yet their effectiveness remains limited by the scarcity of high-quality multimodal trajectories and the cost of manual annotation. We address this challenge with a vision-centric agent tuning framework that automatically synthesizes multimodal trajectories, generates step-wise preference pairs, and trains a VLM controller for robust tool-use reasoning. Our pipeline first constructs M-TRACE, a large-scale dataset of 28.5K multimodal tasks with 177K verified trajectories, enabling imitation-based trajectory tuning. Building on this, we develop MATRIX Agent, a controller finetuned on M-TRACE for step-wise tool reasoning. To achieve finer alignment, we further introduce Pref-X, a set of 11K automatically generated preference pairs, and optimize MATRIX on it via step-wise preference learning. Across three benchmarks, Agent-X, GTA, and GAIA, MATRIX consistently surpasses both open- and closed-source VLMs, demonstrating scalable and effective multimodal tool use. Our data and code is avaliable at https://github.com/mbzuai-oryx/MATRIX.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08245v1" target="_blank" rel="noopener noreferrer">
                低资源语言建模中用于合成数据生成的对比解码方法
            </a>
        </h3>
        <span class="score-badge bg-blue-100 text-blue-800">
            <i class="fa fa-star mr-1"></i>4/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Contrastive Decoding for Synthetic Data Generation in Low-Resource Language Modeling
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jannek Ulm, Kevin Du, Vésteinn Snæbjarnarson
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及合成数据生成和对比解码技术，这些属于LLM核心技术进展，可能应用于推荐系统或搜索中的冷启动问题或数据增强。然而，论文明确聚焦于低资源语言建模这一特定领域，与主流RecSys/Search/Ads应用场景的直接关联性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:04:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08245v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08245v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) are trained on huge amounts of textual data, and concerns have been raised that the limits of such data may soon be reached. A potential solution is to train on synthetic data sampled from LLMs. In this work, we build on this idea and investigate the benefits of contrastive decoding for generating synthetic corpora. In a controlled setting, we experiment with sampling corpora using the relative difference between a good and bad model trained on the same original corpus of 100 million words. By amplifying the signal from a model that has better performance, we create a synthetic corpus and mix it with the original training data. Our findings show that training on a mixture of synthesized and real data improves performance on the language modeling objective and a range of downstream tasks. In particular, we see that training with a mix of synthetic data from contrastive decoding benefits tasks that require more reasoning skills, while synthetic data from traditional sampling helps more on tasks dependent on surface level linguistic capabilities.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07962v1" target="_blank" rel="noopener noreferrer">
                LightReasoner：小型语言模型能否教会大型语言模型推理？
            </a>
        </h3>
        <span class="score-badge bg-blue-100 text-blue-800">
            <i class="fa fa-star mr-1"></i>4/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LightReasoner: Can Small Language Models Teach Large Language Models Reasoning?
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jingyuan Wang, Yankai Chen, Zhonghang Li, Chao Huang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文探讨小型语言模型如何提升大型语言模型的推理能力，这属于核心LLM技术进展。虽然推理能力本身是通用能力，但在推荐和搜索系统中，增强的推理能力可以用于更复杂的用户意图理解、多跳推理查询处理和上下文感知的推荐逻辑。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:55:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07962v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07962v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) have demonstrated remarkable progress in reasoning, often through supervised fine-tuning (SFT). However, SFT is resource-intensive, relying on large curated datasets, rejection-sampled demonstrations, and uniform optimization across all tokens, even though only a fraction carry meaningful learning value. In this work, we explore a counterintuitive idea: can smaller language models (SLMs) teach larger language models (LLMs) by revealing high-value reasoning moments that reflect the latter's unique strength? We propose LightReasoner, a novel framework that leverages the behavioral divergence between a stronger expert model (LLM) and a weaker amateur model (SLM). LightReasoner operates in two stages: (1) a sampling stage that pinpoints critical reasoning moments and constructs supervision examples capturing the expert's advantage through expert-amateur contrast, and (2) a fine-tuning stage that aligns the expert model with these distilled examples, amplifying its reasoning strengths. Across seven mathematical benchmarks, LightReasoner improves accuracy by up to 28.1%, while reducing time consumption by 90%, sampled problems by 80%, and tuned token usage by 99%, all without relying on ground-truth labels. By turning weaker SLMs into effective teaching signals, LightReasoner offers a scalable and resource-efficient approach for advancing LLM reasoning. Code is available at: https://github.com/HKUDS/LightReasoner
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07842v1" target="_blank" rel="noopener noreferrer">
                AdaSwitch：用于知识蒸馏的自适应切换生成
            </a>
        </h3>
        <span class="score-badge bg-blue-100 text-blue-800">
            <i class="fa fa-star mr-1"></i>4/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AdaSwitch: Adaptive Switching Generation for Knowledge Distillation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jingyu Peng, Maolin Wang, Hengyi Cai, Yuchen Li, Kai Zhang, Shuaiqiang Wang, Daw...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注知识蒸馏中的自适应切换生成技术，属于模型压缩和效率优化范畴。虽然知识蒸馏本身是LLM领域的重要技术，可以应用于推荐系统或搜索中的模型部署效率提升，但论文标题未明确指向RecSys/Search/Ads的具体应用场景，因此相关性中等。这种自适应切换机制有潜力应用于推荐系统中的多专家模型或搜索中的查询自适应处理。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 06:38:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07842v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07842v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Small language models (SLMs) are crucial for applications with strict latency and computational constraints, yet achieving high performance remains challenging. Knowledge distillation (KD) can transfer capabilities from large teacher models, but existing methods involve trade-offs: off-policy distillation provides high-quality supervision but introduces a training-inference mismatch, while on-policy approaches maintain consistency but rely on low-quality student outputs. To address these issues, we propose AdaSwitch, a novel approach that dynamically combines on-policy and off-policy generation at the token level. AdaSwitch allows the student to first explore its own predictions and then selectively integrate teacher guidance based on real-time quality assessment. This approach simultaneously preserves consistency and maintains supervision quality. Experiments on three datasets with two teacher-student LLM pairs demonstrate that AdaSwitch consistently improves accuracy, offering a practical and effective method for distilling SLMs with acceptable additional overhead.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08179v1" target="_blank" rel="noopener noreferrer">
                双粒度Sinkhorn蒸馏用于增强长尾噪声数据学习
            </a>
        </h3>
        <span class="score-badge bg-blue-100 text-blue-800">
            <i class="fa fa-star mr-1"></i>4/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Dual-granularity Sinkhorn Distillation for Enhanced Learning from Long-tailed Noisy Data
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Feng Hong, Yu Huang, Zihua Zhao, Zhihan Zhou, Jiangchao Yao, Dongsheng Li, Ya Zh...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出了一种处理长尾噪声数据的方法，这在推荐系统中处理真实世界用户行为数据时具有潜在应用价值。然而，该方法主要关注通用数据分布问题，没有明确针对推荐、搜索或广告领域的特定挑战，因此相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 13:05:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08179v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08179v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Real-world datasets for deep learning frequently suffer from the co-occurring challenges of class imbalance and label noise, hindering model performance. While methods exist for each issue, effectively combining them is non-trivial, as distinguishing genuine tail samples from noisy data proves difficult, often leading to conflicting optimization strategies. This paper presents a novel perspective: instead of primarily developing new complex techniques from scratch, we explore synergistically leveraging well-established, individually 'weak' auxiliary models - specialized for tackling either class imbalance or label noise but not both. This view is motivated by the insight that class imbalance (a distributional-level concern) and label noise (a sample-level concern) operate at different granularities, suggesting that robustness mechanisms for each can in principle offer complementary strengths without conflict. We propose Dual-granularity Sinkhorn Distillation (D-SINK), a novel framework that enhances dual robustness by distilling and integrating complementary insights from such 'weak', single-purpose auxiliary models. Specifically, D-SINK uses an optimal transport-optimized surrogate label allocation to align the target model's sample-level predictions with a noise-robust auxiliary and its class distributions with an imbalance-robust one. Extensive experiments on benchmark datasets demonstrate that D-SINK significantly improves robustness and achieves strong empirical performance in learning from long-tailed noisy data.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07644v1" target="_blank" rel="noopener noreferrer">
                ISMIE：一个用于表征现代信息环境中信息寻求行为的框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ISMIE: A Framework to Characterize Information Seeking in Modern Information Environments
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuoqi Sun, Danula Hettiachchi, Damiano Spina
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题聚焦于信息寻求行为表征框架，虽然与搜索系统有一定关联，但更偏向信息行为学和人机交互领域，而非核心推荐系统、搜索或广告的技术进展。该框架可能为理解用户搜索意图提供基础，但缺乏明确的LLM、Transformer或推荐系统技术应用的具体指向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 00:32:07
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07644v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07644v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The modern information environment (MIE) is increasingly complex, shaped by a wide range of techniques designed to satisfy users' information needs. Information seeking (IS) models are effective mechanisms for characterizing user-system interactions. However, conceptualizing a model that fully captures the MIE landscape poses a challenge. We argue: Does such a model exist? To address this, we propose the Information Seeking in Modern Information Environments (ISMIE) framework as a fundamental step. ISMIE conceptualizes the information seeking process (ISP) via three key concepts: Components (e.g., Information Seeker), Intervening Variables (e.g., Interactive Variables), and Activities (e.g., Acquiring). Using ISMIE's concepts and employing a case study based on a common scenario - misinformation dissemination - we analyze six existing IS and information retrieval (IR) models to illustrate their limitations and the necessity of ISMIE. We then show how ISMIE serves as an actionable framework for both characterization and experimental design. We characterize three pressing issues and then outline two research blueprints: a user-centric, industry-driven experimental design for the authenticity and trust crisis to AI-generated content and a system-oriented, academic-driven design for tackling dopamine-driven content consumption. Our framework offers a foundation for developing IS and IR models to advance knowledge on understanding human interactions and system design in MIEs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08513v1" target="_blank" rel="noopener noreferrer">
                SliceFine：预训练网络的通用获胜切片假设
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SliceFine: The Universal Winning-Slice Hypothesis for Pretrained Networks
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Md Kowsher, Ali O. Polat, Ehsan Mohammady Ardehaly, Mehrdad Salehi, Zia Ghiasi, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文似乎涉及预训练网络的切片分析或优化，可能属于Transformer架构效率或模型分析领域。如果该技术能够识别和优化推荐/搜索系统中最重要的数据切片或用户群体，可能对个性化推荐或广告定向有潜在应用价值，但仅从标题难以确定具体技术细节和应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:45:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08513v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08513v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper presents a theoretical framework explaining why fine tuning small, randomly selected subnetworks (slices) within pre trained models can be sufficient for downstream adaptation. We prove that pretrained networks exhibit a universal winning slice property arising from two phenomena: (1) spectral balance the eigenspectra of different weight matrix slices are remarkably similar; and (2) high task energy their backbone representations retain rich, task relevant features. This leads to the Universal Winning Slice Hypothesis, which provides a theoretical foundation for parameter efficient fine tuning (PEFT) in large scale models. Inspired by this, we propose SliceFine, a PEFT method that exploits this inherent redundancy by updating only selected slices of the original weights introducing zero new parameters, unlike adapter-based approaches. Empirically, SliceFine matches the performance of state of the art PEFT methods across language and vision tasks, while significantly improving training speed, memory efficiency, and model compactness. Our work bridges theory and practice, offering a theoretically grounded alternative to existing PEFT techniques.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08510v1" target="_blank" rel="noopener noreferrer">
                下沉与否：大型视觉语言模型中的视觉信息通路
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            To Sink or Not to Sink: Visual Information Pathways in Large Vision-Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiayun Luo, Wan-Cyuan Fan, Lyuyang Wang, Xiangteng He, Tanzila Rahman, Purang Ab...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究视觉语言模型中的视觉信息处理机制，属于VLM技术范畴。虽然VLM架构对处理异构数据有启发意义，但论文标题聚焦于纯粹的视觉信息通路分析，缺乏明确的推荐系统、搜索或广告应用连接点，因此相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:44:42
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08510v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08510v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Vision Language Models (LVLMs) have recently emerged as powerful architectures capable of understanding and reasoning over both visual and textual information. These models typically rely on two key components: a Vision Transformer (ViT) and a Large Language Model (LLM). ViT encodes visual content into a sequence of image tokens and serves as the perceptual front-end -- the eyes of the model. In contrast, the LLM interprets these tokens to perform high-level reasoning, generates responses, and functions as the cognitive core -- the brain of the model. However, it remains unclear which visual tokens contribute most significantly to understanding and reasoning, and how effectively these signals are propagated from ViT to the LLM. While most existing works have focused on identifying attention sinks, low-semantic tokens receiving disproportionately high attention, within the LLM, we shift the focus to the vision encoder by identifying a class of high-norm visual tokens from ViT, referred to as ViT attention sinks -- a problem that has been rarely studied but is indeed very important for LVLMs. Our findings show that these ViT sinks encapsulate high-level semantic concepts from images, allowing the LLM to perform more effective understanding and reasoning. Despite their importance, these sink tokens are often overlooked in existing LVLM architectures. To explore their contribution, we present both qualitative and quantitative analyses of the information embedded in these sink tokens. We also propose both training-free and training-based approaches to better leverage how this information is interpreted by the LLM, and to what extent. By explicitly utilizing these tokens, we demonstrate substantial improvements across a range of LVLMs and visual reasoning tasks, highlighting the untapped potential of ViT attention sinks in enhancing visual reasoning.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08483v1" target="_blank" rel="noopener noreferrer">
                DeepPrune：消除轨迹间冗余的并行扩展方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DeepPrune: Parallel Scaling without Inter-trace Redundancy
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shangqing Tu, Yaxuan Li, Yushi Bai, Lei Hou, Juanzi Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题暗示了模型效率优化技术，可能涉及并行计算或模型压缩，这属于Transformer架构效率改进的范畴。然而，标题信息有限，无法明确其具体技术细节或与推荐系统、搜索、广告的直接应用潜力，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:24:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08483v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08483v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Parallel scaling has emerged as a powerful paradigm to enhance reasoning capabilities in large language models (LLMs) by generating multiple Chain-of-Thought (CoT) traces simultaneously. However, this approach introduces significant computational inefficiency due to inter-trace redundancy -- our analysis reveals that over 80% of parallel reasoning traces yield identical final answers, representing substantial wasted computation. To address this critical efficiency bottleneck, we propose DeepPrune, a novel framework that enables efficient parallel scaling through dynamic pruning. Our method features a specialized judge model trained with focal loss and oversampling techniques to accurately predict answer equivalence from partial reasoning traces which realizes 0.87 AUROC on equivalence prediction, combined with an online greedy clustering algorithm that dynamically prunes redundant paths while preserving answer diversity. Comprehensive evaluations across three challenging benchmarks (AIME 2024, AIME 2025, and GPQA) and multiple reasoning models demonstrate that DeepPrune achieves remarkable token reduction by over 80% compared to conventional consensus sampling on most cases, while maintaining competitive accuracy within 3 percentage points. Our work establishes a new standard for efficient parallel reasoning, making high-performance reasoning more efficient. Our code and data are here: https://deepprune.github.io/
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08163v1" target="_blank" rel="noopener noreferrer">
                ARM2：具有视觉理解与可执行代码的自适应推理模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ARM2: Adaptive Reasoning Model with Vision Understanding and Executable Code
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jian Xie, Zhendong Chu, Aoxiao Zhong, Kai Zhang, Mingzhe Han, Xin Fang, Jialie S...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉理解和代码执行能力，属于多模态推理范畴。虽然自适应推理机制在概念上可能对推荐系统的复杂决策有启发，但论文的核心焦点是视觉模态和代码执行，与推荐系统、搜索或广告的核心技术栈关联较弱，缺乏明确的直接应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 12:49:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08163v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08163v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Reasoning Models (LRMs) often suffer from the ``over-thinking'' problem, generating unnecessarily long reasoning on simple tasks. Some strategies have been proposed to mitigate this issue, such as length penalties or routing mechanisms, but they are typically heuristic and task-specific, lacking a general framework for adaptive reasoning. In this paper, we present ARM2, a unified model that adaptively balances reasoning performance and efficiency across multiple formats through a reinforcement learning framework augmented with length-aware optimization. Beyond conventional natural language inference, ARM2 integrates vision understanding, extending its applicability to multimodal. Moreover, ARM2 integrates executable code into reasoning, enabling substantial reductions in token cost while preserving task performance compared to long CoT. Experiments demonstrate that ARM2 achieves performance on par with traditional reasoning models trained with GRPO, while reducing token usage by over 70% on average. We further conduct extensive analyses to validate the effectiveness of ARM2 and the soundness of its design.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08149v1" target="_blank" rel="noopener noreferrer">
                AI知识助手：一种为对话式AI代理创建知识库的自动化方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AI Knowledge Assist: An Automated Approach for the Creation of Knowledge Bases for Conversational AI Agents
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Md Tahmid Rahman Laskar, Julien Bouvier Tremblay, Xue-Yong Fu, Cheng Chen, Shash...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注对话式AI代理的知识库创建，属于AIGC和内容生成范畴，与推荐系统、搜索或广告的核心技术关联较弱。虽然知识库构建可能间接支持某些推荐场景，但论文焦点不在排名、检索或个性化等核心领域，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 12:34:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08149v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08149v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The utilization of conversational AI systems by leveraging Retrieval Augmented Generation (RAG) techniques to solve customer problems has been on the rise with the rapid progress of Large Language Models (LLMs). However, the absence of a company-specific dedicated knowledge base is a major barrier to the integration of conversational AI systems in contact centers. To this end, we introduce AI Knowledge Assist, a system that extracts knowledge in the form of question-answer (QA) pairs from historical customer-agent conversations to automatically build a knowledge base. Fine-tuning a lightweight LLM on internal data demonstrates state-of-the-art performance, outperforming larger closed-source LLMs. More specifically, empirical evaluation on 20 companies demonstrates that the proposed AI Knowledge Assist system that leverages the LLaMA-3.1-8B model eliminates the cold-start gap in contact centers by achieving above 90% accuracy in answering information-seeking questions. This enables immediate deployment of RAG-powered chatbots.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08145v1" target="_blank" rel="noopener noreferrer">
                通过基于群体的投票缓解大型语言模型中的判断偏好偏差
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Mitigating Judgment Preference Bias in Large Language Models through Group-Based Polling
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuliang Liu, Zhipeng Xu, Zhenghao Liu, Yukun Yan, Minghe Yu, Yu Gu, Chong Chen,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的偏好偏差缓解，属于LLM评估和偏差校正范畴，与我的核心关注点（推荐系统、搜索、广告的直接应用或使能技术）相关性较弱。虽然偏差缓解可能间接影响推荐/搜索系统的公平性，但论文焦点更偏向通用LLM评估而非特定领域应用，且未明确涉及异构数据建模或Transformer架构创新。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 12:32:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08145v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08145v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) as automatic evaluators, commonly referred to as LLM-as-a-Judge, have also attracted growing attention. This approach plays a vital role in aligning LLMs with human judgments, providing accurate and reliable assessments. However, LLM-based judgment models often exhibit judgment preference bias during the evaluation phase, tending to favor responses generated by themselves, undermining the reliability of their judgments. This paper introduces the Group-Based Polling Optimization (Genii), an unsupervised multi-agent collaborative optimization framework that mitigates the inherent judgment preference bias of judgment models. Specifically, Genii integrates various LLM-based judgment models into a multi-agent system and simulates the interactive client-server polling mechanism to optimize each client agent unsupervisedly. Our experiments demonstrate that Genii outperforms supervised models trained on annotated judgment data, while requiring no human-labeled annotations. Genii consistently improves performance across different client agents during the polling, even when weaker models act as server agents. Further analysis reveals that Genii effectively mitigates judgment preference bias of LLM-based judgment models, demonstrating its effectiveness. All codes are available at https://github.com/NEUIR/Genii.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08081v1" target="_blank" rel="noopener noreferrer">
                AutoQual：一种用于自动发现可解释特征以进行评论质量评估的LLM智能体
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AutoQual: An LLM Agent for Automated Discovery of Interpretable Features for Review Quality Assessment
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiaochong Lan, Jie Feng, Yinxing Liu, Xinlei Shi, Yong Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及LLM在内容质量评估中的应用，这与搜索和推荐系统中的内容理解有一定关联。然而，评论质量评估更偏向内容质量分析而非核心的推荐、搜索或广告排名任务，应用范围相对有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 11:11:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08081v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08081v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Ranking online reviews by their intrinsic quality is a critical task for e-commerce platforms and information services, impacting user experience and business outcomes. However, quality is a domain-dependent and dynamic concept, making its assessment a formidable challenge. Traditional methods relying on hand-crafted features are unscalable across domains and fail to adapt to evolving content patterns, while modern deep learning approaches often produce black-box models that lack interpretability and may prioritize semantics over quality. To address these challenges, we propose AutoQual, an LLM-based agent framework that automates the discovery of interpretable features. While demonstrated on review quality assessment, AutoQual is designed as a general framework for transforming tacit knowledge embedded in data into explicit, computable features. It mimics a human research process, iteratively generating feature hypotheses through reflection, operationalizing them via autonomous tool implementation, and accumulating experience in a persistent memory. We deploy our method on a large-scale online platform with a billion-level user base. Large-scale A/B testing confirms its effectiveness, increasing average reviews viewed per user by 0.79% and the conversion rate of review readers by 0.27%.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08002v1" target="_blank" rel="noopener noreferrer">
                在职学习：面向长周期任务的经验驱动自进化智能体
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Learning on the Job: An Experience-Driven Self-Evolving Agent for Long-Horizon Tasks
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Cheng Yang, Xuemeng Yang, Licheng Wen, Daocheng Fu, Jianbiao Mei, Rong Wu, Pinlo...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注智能体的持续学习和自我进化能力，属于通用AI智能体领域。虽然经验驱动的学习机制在推荐系统中可能有潜在应用（如用户行为建模的持续优化），但论文聚焦于长周期任务而非具体的推荐、搜索或广告场景，与核心关注点关联较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 09:40:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08002v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08002v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models have demonstrated remarkable capabilities across diverse domains, yet significant challenges persist when deploying them as AI agents for real-world long-horizon tasks. Existing LLM agents suffer from a critical limitation: they are test-time static and cannot learn from experience, lacking the ability to accumulate knowledge and continuously improve on the job. To address this challenge, we propose MUSE, a novel agent framework that introduces an experience-driven, self-evolving system centered around a hierarchical Memory Module. MUSE organizes diverse levels of experience and leverages them to plan and execute long-horizon tasks across multiple applications. After each sub-task execution, the agent autonomously reflects on its trajectory, converting the raw trajectory into structured experience and integrating it back into the Memory Module. This mechanism enables the agent to evolve beyond its static pretrained parameters, fostering continuous learning and self-evolution. We evaluate MUSE on the long-horizon productivity benchmark TAC. It achieves new SOTA performance by a significant margin using only a lightweight Gemini-2.5 Flash model. Sufficient Experiments demonstrate that as the agent autonomously accumulates experience, it exhibits increasingly superior task completion capabilities, as well as robust continuous learning and self-evolution capabilities. Moreover, the accumulated experience from MUSE exhibits strong generalization properties, enabling zero-shot improvement on new tasks. MUSE establishes a new paradigm for AI agents capable of real-world productivity task automation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07821v1" target="_blank" rel="noopener noreferrer">
                从关键词到聚类：基于人工智能的YouTube评论分析揭示2024年选举议题显著性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            From Keywords to Clusters: AI-Driven Analysis of YouTube Comments to Reveal Election Issue Salience in 2024
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Raisa M. Simoes, Timoteo Kelly, Eduardo J. Simoes, Praveen Rao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然论文涉及AI驱动的文本分析，但其焦点是政治选举议题分析而非推荐系统、搜索或广告的核心技术。该方法可能间接应用于内容理解，但缺乏对RecSys/Search/Ads架构、Transformer改进或LLM应用的直接贡献。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 06:02:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07821v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07821v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.SI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper aims to explore two competing data science methodologies to attempt answering the question, "Which issues contributed most to voters' choice in the 2024 presidential election?" The methodologies involve novel empirical evidence driven by artificial intelligence (AI) techniques. By using two distinct methods based on natural language processing and clustering analysis to mine over eight thousand user comments on election-related YouTube videos from one right leaning journal, Wall Street Journal, and one left leaning journal, New York Times, during pre-election week, we quantify the frequency of selected issue areas among user comments to infer which issues were most salient to potential voters in the seven days preceding the November 5th election. Empirically, we primarily demonstrate that immigration and democracy were the most frequently and consistently invoked issues in user comments on the analyzed YouTube videos, followed by the issue of identity politics, while inflation was significantly less frequently referenced. These results corroborate certain findings of post-election surveys but also refute the supposed importance of inflation as an election issue. This indicates that variations on opinion mining, with their analysis of raw user data online, can be more revealing than polling and surveys for analyzing election outcomes.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07776v1" target="_blank" rel="noopener noreferrer">
                基于标签知识传播的实例关系学习网络用于少样本多标签意图检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Instance Relation Learning Network with Label Knowledge Propagation for Few-shot Multi-label Intent Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shiman Zhao, Shangyuan Li, Wei Chen, Tengjiao Wang, Jiahui Yao, Jiabin Zheng, Ka...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注少样本多标签意图检测，这属于对话系统和NLP领域的特定任务，与推荐系统、搜索或广告的核心领域进展没有直接关联。虽然意图检测在搜索查询理解中有潜在应用，但论文重点在于少样本学习和多标签分类技术，而非直接应用于推荐、搜索或广告系统的核心排名或建模问题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 04:47:06
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07776v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07776v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Few-shot Multi-label Intent Detection (MID) is crucial for dialogue systems, aiming to detect multiple intents of utterances in low-resource dialogue domains. Previous studies focus on a two-stage pipeline. They first learn representations of utterances with multiple labels and then use a threshold-based strategy to identify multi-label results. However, these methods rely on representation classification and ignore instance relations, leading to error propagation. To solve the above issues, we propose a multi-label joint learning method for few-shot MID in an end-to-end manner, which constructs an instance relation learning network with label knowledge propagation to eliminate error propagation. Concretely, we learn the interaction relations between instances with class information to propagate label knowledge between a few labeled (support set) and unlabeled (query set) instances. With label knowledge propagation, the relation strength between instances directly indicates whether two utterances belong to the same intent for multi-label prediction. Besides, a dual relation-enhanced loss is developed to optimize support- and query-level relation strength to improve performance. Experiments show that we outperform strong baselines by an average of 9.54% AUC and 11.19% Macro-F1 in 1-shot scenarios.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07761v1" target="_blank" rel="noopener noreferrer">
                测试时推理器是策略性多项选择题答题者
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Test-Time Reasoners Are Strategic Multiple-Choice Test-Takers
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nishant Balepur, Atrey Desai, Rachel Rudinger
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注LLM在测试时的推理策略和多项选择题处理能力，这属于核心LLM技术的进步。虽然测试时推理优化可能间接提升推荐或搜索系统中LLM的决策质量，但论文焦点是通用的测试策略而非具体的RecSys/Search/Ads应用，因此相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 04:00:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07761v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07761v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) now give reasoning before answering, excelling in tasks like multiple-choice question answering (MCQA). Yet, a concern is that LLMs do not solve MCQs as intended, as work finds LLMs sans reasoning succeed in MCQA without using the question, i.e., choices-only. Such partial-input success is often deemed problematic, but reasoning traces could reveal if these strategies are truly shallow in choices-only settings. To study these strategies, reasoning LLMs solve MCQs in full and choices-only inputs; test-time reasoning often boosts accuracy on full and in choices-only half the time. While possibly due to shallow shortcuts, choices-only success is barely affected by the length of reasoning traces, and after finding traces pass faithfulness tests, we show they use less problematic strategies like inferring missing questions. In all, we challenge claims that partial-input success is always a flaw, so we discuss how reasoning traces could separate problematic data from less problematic reasoning.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07736v1" target="_blank" rel="noopener noreferrer">
                基于高效多语言知识共享的多语言知识图谱补全
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Multilingual Knowledge Graph Completion via Efficient Multilingual Knowledge Sharing
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Cunli Mao, Xiaofei Gao, Ran Song, Shizhu He, Shengxiang Gao, Kang Liu, Zhengtao ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多语言知识图谱补全，属于通用知识图谱领域，与推荐系统、搜索或广告的核心进展没有直接关联。虽然知识图谱技术可以间接支持这些领域的知识增强，但论文标题未表明其专注于推荐、搜索或广告应用，也未涉及LLM、Transformer架构或异构数据统一建模等关键技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 03:19:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07736v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07736v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) based Multilingual Knowledge Graph Completion (MKGC) aim to predict missing facts by leveraging LLMs' multilingual understanding capabilities, improving the completeness of multilingual knowledge graphs (KGs). However, existing MKGC research underutilizes the multilingual capabilities of LLMs and ignores the shareability of cross-lingual knowledge. In this paper, we propose a novel MKGC framework that leverages multilingual shared knowledge to significantly enhance performance through two components: Knowledge-level Grouped Mixture of Experts (KL-GMoE) and Iterative Entity Reranking (IER). KL-GMoE efficiently models shared knowledge, while IER significantly enhances its utilization. To evaluate our framework, we constructed a mKG dataset containing 5 languages and conducted comprehensive comparative experiments with existing state-of-the-art (SOTA) MKGC method. The experimental results demonstrate that our framework achieves improvements of 5.47%, 3.27%, and 1.01% in the Hits@1, Hits@3, and Hits@10 metrics, respectively, compared with SOTA MKGC method. Further experimental analysis revealed the properties of knowledge sharing in settings of unseen and unbalanced languages. We have released the dataset and code for our work on https://github.com/gaoxiaofei07/KL-GMoE.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08431v1" target="_blank" rel="noopener noreferrer">
                基于分数正则化连续时间一致性的大规模扩散蒸馏
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Large Scale Diffusion Distillation via Score-Regularized Continuous-Time Consistency
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kaiwen Zheng, Yuji Wang, Qianli Ma, Huayu Chen, Jintao Zhang, Yogesh Balaji, Jia...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注扩散模型的蒸馏技术，属于生成模型效率优化领域。虽然扩散模型在内容生成方面有应用，但该技术本身是通用的模型压缩方法，与推荐系统、搜索或广告的核心排序和匹配问题关联较弱。在推荐/搜索场景中，潜在的间接应用可能包括生成式推荐中的模型加速，但这不是直接相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:45:30
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08431v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08431v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This work represents the first effort to scale up continuous-time consistency distillation to general application-level image and video diffusion models. Although continuous-time consistency model (sCM) is theoretically principled and empirically powerful for accelerating academic-scale diffusion, its applicability to large-scale text-to-image and video tasks remains unclear due to infrastructure challenges in Jacobian-vector product (JVP) computation and the limitations of standard evaluation benchmarks. We first develop a parallelism-compatible FlashAttention-2 JVP kernel, enabling sCM training on models with over 10 billion parameters and high-dimensional video tasks. Our investigation reveals fundamental quality limitations of sCM in fine-detail generation, which we attribute to error accumulation and the "mode-covering" nature of its forward-divergence objective. To remedy this, we propose the score-regularized continuous-time consistency model (rCM), which incorporates score distillation as a long-skip regularizer. This integration complements sCM with the "mode-seeking" reverse divergence, effectively improving visual quality while maintaining high generation diversity. Validated on large-scale models (Cosmos-Predict2, Wan2.1) up to 14B parameters and 5-second videos, rCM matches or surpasses the state-of-the-art distillation method DMD2 on quality metrics while offering notable advantages in diversity, all without GAN tuning or extensive hyperparameter searches. The distilled models generate high-fidelity samples in only $1\sim4$ steps, accelerating diffusion sampling by $15\times\sim50\times$. These results position rCM as a practical and theoretically grounded framework for advancing large-scale diffusion distillation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08178v1" target="_blank" rel="noopener noreferrer">
                通过自举数据重对齐实现鲁棒规范化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Robust Canonicalization through Bootstrapped Data Re-Alignment
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Johann Schmidt, Sebastian Stober
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题暗示了数据对齐和规范化技术，这在推荐系统和搜索中可能用于处理异构数据源或特征工程。然而，标题过于宽泛，没有明确说明与Transformer架构、LLM技术或推荐/搜索/广告系统的直接关联，潜在应用不明确。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 13:05:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08178v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08178v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Fine-grained visual classification (FGVC) tasks, such as insect and bird identification, demand sensitivity to subtle visual cues while remaining robust to spatial transformations. A key challenge is handling geometric biases and noise, such as different orientations and scales of objects. Existing remedies rely on heavy data augmentation, which demands powerful models, or on equivariant architectures, which constrain expressivity and add cost. Canonicalization offers an alternative by shielding such biases from the downstream model. In practice, such functions are often obtained using canonicalization priors, which assume aligned training data. Unfortunately, real-world datasets never fulfill this assumption, causing the obtained canonicalizer to be brittle. We propose a bootstrapping algorithm that iteratively re-aligns training samples by progressively reducing variance and recovering the alignment assumption. We establish convergence guarantees under mild conditions for arbitrary compact groups, and show on four FGVC benchmarks that our method consistently outperforms equivariant, and canonicalization baselines while performing on par with augmentation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08003v1" target="_blank" rel="noopener noreferrer">
                CIR-CoT：通过端到端思维链推理实现可解释的组合图像检索
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CIR-CoT: Towards Interpretable Composed Image Retrieval via End-to-End Chain-of-Thought Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Weihuang Lin, Yiwei Ma, Jiayi Ji, Xiaoshuai Sun, Rongrong Ji
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然该论文涉及检索系统，但主要关注组合图像检索这一视觉领域特定任务，与推荐系统、搜索或广告的核心进展关联有限。思维链推理技术可能对理解用户意图有潜在价值，但论文的直接应用场景过于偏向视觉检索而非文本或异构数据建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 09:41:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08003v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08003v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Composed Image Retrieval (CIR), which aims to find a target image from a reference image and a modification text, presents the core challenge of performing unified reasoning across visual and semantic modalities. While current approaches based on Vision-Language Models (VLMs, e.g., CLIP) and more recent Multimodal Large Language Models (MLLMs, e.g., Qwen-VL) have shown progress, they predominantly function as ``black boxes." This inherent opacity not only prevents users from understanding the retrieval rationale but also restricts the models' ability to follow complex, fine-grained instructions. To overcome these limitations, we introduce CIR-CoT, the first end-to-end retrieval-oriented MLLM designed to integrate explicit Chain-of-Thought (CoT) reasoning. By compelling the model to first generate an interpretable reasoning chain, CIR-CoT enhances its ability to capture crucial cross-modal interactions, leading to more accurate retrieval while making its decision process transparent. Since existing datasets like FashionIQ and CIRR lack the necessary reasoning data, a key contribution of our work is the creation of structured CoT annotations using a three-stage process involving a caption, reasoning, and conclusion. Our model is then fine-tuned to produce this structured output before encoding its final retrieval intent into a dedicated embedding. Comprehensive experiments show that CIR-CoT achieves highly competitive performance on in-domain datasets (FashionIQ, CIRR) and demonstrates remarkable generalization on the out-of-domain CIRCO dataset, establishing a new path toward more effective and trustworthy retrieval systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08558v1" target="_blank" rel="noopener noreferrer">
                智能体通过早期经验学习
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Agent Learning via Early Experience
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kai Zhang, Xiangchao Chen, Bo Liu, Tianci Xue, Zeyi Liao, Zhihan Liu, Xiyao Wang...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题聚焦于智能体学习机制，主要涉及强化学习或智能体训练方法，属于通用AI技术范畴。虽然智能体技术可能间接应用于推荐系统或搜索的交互优化，但标题未明确显示与推荐系统、搜索、广告或相关使能技术（如Transformer、LLM应用）的直接关联，且未提及多模态数据处理等具体应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:17
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08558v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08558v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.IR</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    A long-term goal of language agents is to learn and improve through their own experience, ultimately outperforming humans in complex, real-world tasks. However, training agents from experience data with reinforcement learning remains difficult in many environments, which either lack verifiable rewards (e.g., websites) or require inefficient long-horizon rollouts (e.g., multi-turn tool use). As a result, most current agents rely on supervised fine-tuning on expert data, which is challenging to scale and generalizes poorly. This limitation stems from the nature of expert demonstrations: they capture only a narrow range of scenarios and expose the agent to limited environment diversity. We address this limitation with a middle-ground paradigm we call early experience: interaction data generated by the agent's own actions, where the resulting future states serve as supervision without reward signals. Within this paradigm we study two strategies of using such data: (1) Implicit world modeling, which uses collected states to ground the policy in environment dynamics; and (2) Self-reflection, where the agent learns from its suboptimal actions to improve reasoning and decision-making. We evaluate across eight diverse environments and multiple model families. Our approaches consistently improve effectiveness and out-of-domain generalization, highlighting the value of early experience. Moreover, in environments with verifiable rewards, our results provide promising signals that early experience offers a strong foundation for subsequent reinforcement learning, positioning it as a practical bridge between imitation learning and fully experience-driven agents.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08385v1" target="_blank" rel="noopener noreferrer">
                使用GPT-4o和上下文学习检测历史地图上的图例项
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Detecting Legend Items on Historical Maps Using GPT-4o with In-Context Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sofia Kirsanova, Yao-Yi Chiang, Weiwei Duan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要涉及计算机视觉任务（历史地图分析）和LLM的特定应用，与推荐系统、搜索或广告的核心领域进展无关。虽然使用了GPT-4o，但应用场景（历史地图图例检测）在RecSys/Search/Ads领域没有明显的实际应用潜力，属于纯粹的视觉应用范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:08:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08385v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08385v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.DB</span><span class="category-tag">cs.IR</span><span class="category-tag">H.2.8; H.3.3; I.2.10; I.4.8</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Historical map legends are critical for interpreting cartographic symbols. However, their inconsistent layouts and unstructured formats make automatic extraction challenging. Prior work focuses primarily on segmentation or general optical character recognition (OCR), with few methods effectively matching legend symbols to their corresponding descriptions in a structured manner. We present a method that combines LayoutLMv3 for layout detection with GPT-4o using in-context learning to detect and link legend items and their descriptions via bounding box predictions. Our experiments show that GPT-4 with structured JSON prompts outperforms the baseline, achieving 88% F-1 and 85% IoU, and reveal how prompt design, example counts, and layout alignment affect performance. This approach supports scalable, layout-aware legend parsing and improves the indexing and searchability of historical maps across various visual styles.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08543v1" target="_blank" rel="noopener noreferrer">
                VideoNorms：视频语言模型文化意识基准测试
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VideoNorms: Benchmarking Cultural Awareness of Video Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nikhil Reddy Varimalla, Yunfei Xu, Arkadiy Saakyan, Meng Fan Wang, Smaranda Mure...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于视频语言模型的文化意识基准测试，属于纯粹的评估基准研究范畴，与我的核心关注点无关。虽然涉及多模态模型，但主要关注文化评估这一特定NLP任务，缺乏在推荐系统、搜索或广告领域的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:54:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08543v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08543v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.CY</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    As Video Large Language Models (VideoLLMs) are deployed globally, they require understanding of and grounding in the relevant cultural background. To properly assess these models' cultural awareness, adequate benchmarks are needed. We introduce VideoNorms, a benchmark of over 1000 (video clip, norm) pairs from US and Chinese cultures annotated with socio-cultural norms grounded in speech act theory, norm adherence and violations labels, and verbal and non-verbal evidence. To build VideoNorms, we use a human-AI collaboration framework, where a teacher model using theoretically-grounded prompting provides candidate annotations and a set of trained human experts validate and correct the annotations. We benchmark a variety of open-weight VideoLLMs on the new dataset which highlight several common trends: 1) models performs worse on norm violation than adherence; 2) models perform worse w.r.t Chinese culture compared to the US culture; 3) models have more difficulty in providing non-verbal evidence compared to verbal for the norm adhere/violation label and struggle to identify the exact norm corresponding to a speech-act; and 4) unlike humans, models perform worse in formal, non-humorous contexts. Our findings emphasize the need for culturally-grounded video language model training - a gap our benchmark and framework begin to address.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08531v1" target="_blank" rel="noopener noreferrer">
                SpatialLadder：视觉语言模型中空间推理的渐进式训练
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SpatialLadder: Progressive Training for Spatial Reasoning in Vision-Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hongxing Li, Dingming Li, Zixuan Wang, Yuchen Yan, Hang Wu, Wenqi Zhang, Yonglia...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视觉语言模型中的空间推理能力提升，属于纯粹的视觉-语言多模态研究。虽然标题提及了渐进式训练方法，但核心应用场景是空间理解而非推荐系统、搜索或广告领域。这种空间推理技术缺乏明确的路径应用于异构数据处理或推荐/搜索场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:50:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08531v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08531v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Spatial reasoning remains a fundamental challenge for Vision-Language Models (VLMs), with current approaches struggling to achieve robust performance despite recent advances. We identify that this limitation stems from a critical gap: existing methods attempt to learn spatial reasoning directly without establishing the hierarchical foundations of perception and understanding. To address this challenge, we present a comprehensive methodology for building spatial intelligence progressively. We introduce SpatialLadder-26k, a multimodal dataset containing 26,610 samples spanning object localization, single image, multi-view, and video spatial reasoning tasks, constructed through a standardized pipeline that ensures systematic coverage across modalities. Building on this dataset, we design a three-stage progressive training framework that (1) establishes spatial perception through object localization, (2) develops spatial understanding through multi-dimensional spatial tasks, and (3) strengthens complex reasoning via reinforcement learning with verifiable rewards. This approach yields SpatialLadder, a 3B-parameter model that achieves state-of-the-art performance on spatial reasoning benchmarks, with 23.4% average improvement over the base model, surpassing GPT-4o by 20.8% and Gemini-2.0-Flash by 10.1%. Notably, SpatialLadder maintains strong generalization with 7.2% improvement on out-of-domain benchmarks, demonstrating that progressive training from perception to reasoning is essential for robust spatial intelligence.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08529v1" target="_blank" rel="noopener noreferrer">
                CoMAS：通过交互奖励共同演化的多智能体系统
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CoMAS: Co-Evolving Multi-Agent Systems via Interaction Rewards
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiangyuan Xue, Yifan Zhou, Guibin Zhang, Zaibin Zhang, Yijiang Li, Chen Zhang, Z...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注多智能体系统的共同演化机制，属于强化学习领域。虽然多智能体系统在理论上可能应用于推荐系统中的多智能体协同，但论文标题未明确显示与推荐、搜索或广告系统的直接关联，也未涉及LLM、Transformer架构或异构数据建模等核心技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:50:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08529v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08529v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Self-evolution is a central research topic in enabling large language model (LLM)-based agents to continually improve their capabilities after pretraining. Recent research has witnessed a transition from reinforcement learning (RL)-free to RL-based methods. Current RL-based methods either rely on dense external reward signals or extract intrinsic reward signals from LLMs themselves. However, these approaches diverge from the self-evolution mechanisms observed in human intelligence, where individuals learn and improve through mutual discussion and collaboration. In this work, we introduce Co-Evolving Multi-Agent Systems (CoMAS), a novel framework that enables agents to improve autonomously by learning from inter-agent interactions without external supervision. CoMAS generates intrinsic rewards from rich discussion dynamics, employs an LLM-as-a-judge mechanism to formulate these rewards, and optimizes each agent's policy through RL, thereby enabling decentralized and scalable co-evolution. Experimental results demonstrate that CoMAS consistently outperforms untrained agents and achieves state-of-the-art performance across most evaluation settings. Ablation studies confirm the necessity of interaction-based reward signals and reveal promising scalability as the number and diversity of agents increase. These findings establish CoMAS as a novel and effective paradigm for self-evolution in LLM-based agents.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08524v1" target="_blank" rel="noopener noreferrer">
                基于代理提示评估器的高效法律文本分类提示优化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Efficient Prompt Optimisation for Legal Text Classification with Proxy Prompt Evaluator
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hyunji Lee, Kevin Chenhao Li, Matthias Grabmair, Shanshan Xu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注法律领域的文本分类提示优化，属于特定领域应用而非核心推荐系统、搜索或广告技术。虽然涉及提示优化技术，但缺乏与异构数据建模、Transformer架构改进或直接应用于推荐/搜索/广告系统的明确联系。代理评估器方法可能对效率优化有启发，但法律领域的特定性限制了其在目标领域的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:49:53
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08524v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08524v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Prompt optimization aims to systematically refine prompts to enhance a language model's performance on specific tasks. Fairness detection in Terms of Service (ToS) clauses is a challenging legal NLP task that demands carefully crafted prompts to ensure reliable results. However, existing prompt optimization methods are often computationally expensive due to inefficient search strategies and costly prompt candidate scoring. In this paper, we propose a framework that combines Monte Carlo Tree Search (MCTS) with a proxy prompt evaluator to more effectively explore the prompt space while reducing evaluation costs. Experiments demonstrate that our approach achieves higher classification accuracy and efficiency than baseline methods under a constrained computation budget.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08506v1" target="_blank" rel="noopener noreferrer">
                新词学习用于可控性和自我言语化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Neologism Learning for Controllability and Self-Verbalization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>John Hewitt, Oyvind Tafjord, Robert Geirhos, Been Kim
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的新词学习和自我言语化能力，属于纯粹的NLP中心主题，与推荐系统、搜索或广告的核心技术进展没有直接关联。虽然可控性可能对内容生成有一定意义，但论文焦点不在推荐/搜索/广告领域的实际应用，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:41:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08506v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08506v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Humans invent new words when there is a rising demand for a new useful concept (e.g., doomscrolling). We explore and validate a similar idea in our communication with LLMs: introducing new words to better understand and control the models, expanding on the recently introduced neologism learning. This method introduces a new word by adding a new word embedding and training with examples that exhibit the concept with no other changes in model parameters. We show that adding a new word allows for control of concepts such as flattery, incorrect answers, text length, as well as more complex concepts in AxBench. We discover that neologisms can also further our understanding of the model via self-verbalization: models can describe what each new word means to them in natural language, like explaining that a word that represents a concept of incorrect answers means ``a lack of complete, coherent, or meaningful answers...'' To validate self-verbalizations, we introduce plug-in evaluation: we insert the verbalization into the context of a model and measure whether it controls the target concept. In some self-verbalizations, we find machine-only synonyms: words that seem unrelated to humans but cause similar behavior in machines. Finally, we show how neologism learning can jointly learn multiple concepts in multiple words.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08388v1" target="_blank" rel="noopener noreferrer">
                若可能，则可接受？理解大型语言模型中的条件可接受性判断
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            If Probable, Then Acceptable? Understanding Conditional Acceptability Judgments in Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jasmin Orth, Philipp Mondorf, Barbara Plank
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究LLM的条件可接受性判断，这属于模型行为分析和评估范畴，与您的核心关注点（推荐系统、搜索广告领域的核心进展、使能技术及应用）相关性较弱。虽然涉及LLM内部机制，但更偏向NLP评估和模型行为理解，而非您关注的使能技术或直接应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:12:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08388v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08388v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Conditional acceptability refers to how plausible a conditional statement is perceived to be. It plays an important role in communication and reasoning, as it influences how individuals interpret implications, assess arguments, and make decisions based on hypothetical scenarios. When humans evaluate how acceptable a conditional "If A, then B" is, their judgments are influenced by two main factors: the $\textit{conditional probability}$ of $B$ given $A$, and the $\textit{semantic relevance}$ of the antecedent $A$ given the consequent $B$ (i.e., whether $A$ meaningfully supports $B$). While prior work has examined how large language models (LLMs) draw inferences about conditional statements, it remains unclear how these models judge the $\textit{acceptability}$ of such statements. To address this gap, we present a comprehensive study of LLMs' conditional acceptability judgments across different model families, sizes, and prompting strategies. Using linear mixed-effects models and ANOVA tests, we find that models are sensitive to both conditional probability and semantic relevance-though to varying degrees depending on architecture and prompting style. A comparison with human data reveals that while LLMs incorporate probabilistic and semantic cues, they do so less consistently than humans. Notably, larger models do not necessarily align more closely with human judgments.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08329v1" target="_blank" rel="noopener noreferrer">
                AutoRed：一种用于自动化红队测试的自由形式对抗性提示生成框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AutoRed: A Free-form Adversarial Prompt Generation Framework for Automated Red Teaming
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Muxi Diao, Yutao Mou, Keqing He, Hanbo Song, Lulu Zhao, Shikun Zhang, Wei Ye, Ko...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注对抗性提示生成和红队测试，这属于LLM安全评估领域，与推荐系统、搜索或广告的核心技术进展关联较弱。虽然红队测试可能间接应用于评估推荐系统的鲁棒性，但论文焦点更偏向安全测试而非核心推荐/搜索算法改进，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 15:17:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08329v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08329v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The safety of Large Language Models (LLMs) is crucial for the development of trustworthy AI applications. Existing red teaming methods often rely on seed instructions, which limits the semantic diversity of the synthesized adversarial prompts. We propose AutoRed, a free-form adversarial prompt generation framework that removes the need for seed instructions. AutoRed operates in two stages: (1) persona-guided adversarial instruction generation, and (2) a reflection loop to iteratively refine low-quality prompts. To improve efficiency, we introduce a verifier to assess prompt harmfulness without querying the target models. Using AutoRed, we build two red teaming datasets -- AutoRed-Medium and AutoRed-Hard -- and evaluate eight state-of-the-art LLMs. AutoRed achieves higher attack success rates and better generalization than existing baselines. Our results highlight the limitations of seed-based approaches and demonstrate the potential of free-form red teaming for LLM safety evaluation. We will open source our datasets in the near future.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08325v1" target="_blank" rel="noopener noreferrer">
                超越Pass@k：用于推理边界的广度-深度度量
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Pass@k: Breadth-Depth Metrics for Reasoning Boundaries
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Marius Dragoi, Ioana Pintilie, Florin Gogianu, Florin Brad
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM推理能力的评估指标，属于纯粹的NLP评估基准研究。虽然LLM推理能力可能间接影响推荐或搜索系统的性能，但论文本身没有明确涉及推荐系统、搜索或广告的直接应用，也不属于核心架构或使能技术进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 15:14:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08325v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08325v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span><span class="category-tag">I.2.6; I.2.7</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Reinforcement Learning with Verifiable Rewards (RLVR) has emerged as a powerful paradigm to improve Large Language Models on reasoning tasks such as coding, math or logic. To assess the reasoning boundary (the fraction of problems a model can solve) researchers often report Pass@k at large sampling budgets. Recent results reveal a crossover phenomenon: while RLVR models outperform the base model at small k values, the base model usually outperforms them when sampling a very large number of completions. This has been interpreted as evidence that base models have a larger reasoning boundary. We argue that on tasks with discrete answer spaces, such as math with numeric outputs, Pass@k at large k reflects the increasingly higher chance of success in the limit of the number of trials rather than genuine reasoning, and can therefore be misleading. We propose Cover@tau, which measures the fraction of problems that a model can solve for which at least a tau proportion of completions are correct. Unlike Pass@k, Cover@tau captures reasoning under an explicit reliability threshold: models that rely on random guessing degrade rapidly as tau increases. We evaluate several RLVR models using Cover@tau-based metrics and illustrate how the relative rankings of popular algorithms change compared to Pass@1, offering a different perspective on reasoning boundaries.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08284v1" target="_blank" rel="noopener noreferrer">
                大语言模型中文化理解的神经元级分析
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Neuron-Level Analysis of Cultural Understanding in Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Taisei Yamamoto, Ryoma Kumon, Danushka Bollegala, Hitomi Yanaka
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM内部机制分析和文化理解这一特定NLP能力，属于纯粹的LLM内部机制研究。虽然涉及LLM技术，但缺乏明确的推荐系统、搜索或广告应用场景，更偏向于模型可解释性和NLP能力分析，而非能够直接应用于业务场景的使能技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:35:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08284v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08284v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    As large language models (LLMs) are increasingly deployed worldwide, ensuring their fair and comprehensive cultural understanding is important. However, LLMs exhibit cultural bias and limited awareness of underrepresented cultures, while the mechanisms underlying their cultural understanding remain underexplored. To fill this gap, we conduct a neuron-level analysis to identify neurons that drive cultural behavior, introducing a gradient-based scoring method with additional filtering for precise refinement. We identify both culture-general neurons contributing to cultural understanding regardless of cultures, and culture-specific neurons tied to an individual culture. These neurons account for less than 1% of all neurons and are concentrated in shallow to middle MLP layers. We validate their role by showing that suppressing them substantially degrades performance on cultural benchmarks (by up to 30%), while performance on general natural language understanding (NLU) benchmarks remains largely unaffected. Moreover, we show that culture-specific neurons support knowledge of not only the target culture, but also related cultures. Finally, we demonstrate that training on NLU benchmarks can diminish models' cultural understanding when we update modules containing many culture-general neurons. These findings provide insights into the internal mechanisms of LLMs and offer practical guidance for model training and engineering. Our code is available at https://github.com/ynklab/CULNIG
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08255v1" target="_blank" rel="noopener noreferrer">
                LLM智能体中的对手塑造
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Opponent Shaping in LLM Agents
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Marta Emili Garcia Segura, Stephen Hailes, Mirco Musolesi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM智能体在多智能体环境中的对手建模和策略塑造，属于多智能体强化学习范畴。虽然涉及LLM技术，但缺乏与推荐系统、搜索或广告领域的直接关联，且对手塑造概念在这些商业应用场景中并不常见。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:13:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08255v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08255v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.MA</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) are increasingly being deployed as autonomous agents in real-world environments. As these deployments scale, multi-agent interactions become inevitable, making it essential to understand strategic behavior in such systems. A central open question is whether LLM agents, like reinforcement learning agents, can shape the learning dynamics and influence the behavior of others through interaction alone. In this paper, we present the first investigation of opponent shaping (OS) with LLM-based agents. Existing OS algorithms cannot be directly applied to LLMs, as they require higher-order derivatives, face scalability constraints, or depend on architectural components that are absent in transformers. To address this gap, we introduce ShapeLLM, an adaptation of model-free OS methods tailored for transformer-based agents. Using ShapeLLM, we examine whether LLM agents can influence co-players' learning dynamics across diverse game-theoretic environments. We demonstrate that LLM agents can successfully guide opponents toward exploitable equilibria in competitive games (Iterated Prisoner's Dilemma, Matching Pennies, and Chicken) and promote coordination and improve collective welfare in cooperative games (Iterated Stag Hunt and a cooperative version of the Prisoner's Dilemma). Our findings show that LLM agents can both shape and be shaped through interaction, establishing opponent shaping as a key dimension of multi-agent LLM research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08224v1" target="_blank" rel="noopener noreferrer">
                探究文本因果提取中的反主张研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Investigating Counterclaims in Causality Extraction from Text
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tim Hagen, Niklas Deckers, Felix Wolter, Harrisen Scells, Martin Potthast
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本中的因果提取和反主张分析，这属于通用NLP信息抽取任务，与推荐系统、搜索或广告的核心技术没有直接关联。虽然因果推理在理论上可能对某些推荐场景有启发，但论文本身没有展示明确的RecSys/Search/Ads应用潜力，且更偏向纯粹的NLP研究范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 13:45:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08224v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08224v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Research on causality extraction from text has so far almost entirely neglected counterclaims. Existing causality extraction datasets focus solely on "procausal" claims, i.e., statements that support a relationship. "Concausal" claims, i.e., statements that refute a relationship, are entirely ignored or even accidentally annotated as procausal. We address this shortcoming by developing a new dataset that integrates concausality. Based on an extensive literature review, we first show that concausality is an integral part of causal reasoning on incomplete knowledge. We operationalize this theory in the form of a rigorous guideline for annotation and then augment the Causal News Corpus with concausal statements, obtaining a substantial inter-annotator agreement of Cohen's $\kappa=0.74$. To demonstrate the importance of integrating concausal statements, we show that models trained without concausal relationships tend to misclassify these as procausal instead. Based on our new dataset, this mistake can be mitigated, enabling transformers to effectively distinguish pro- and concausality.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08211v1" target="_blank" rel="noopener noreferrer">
                大语言模型无意中学会欺骗：从错位样本到有偏见的人机交互中出现的诚实度错位
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LLMs Learn to Deceive Unintentionally: Emergent Misalignment in Dishonesty from Misaligned Samples to Biased Human-AI Interactions
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>XuHao Hu, Peng Wang, Xiaoya Lu, Dongrui Liu, Xuanjing Huang, Jing Shao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究LLMs的欺骗行为和错位问题，这属于模型安全性和对齐范畴，而非推荐系统、搜索或广告的核心技术进展。虽然涉及人机交互，但焦点是伦理和安全性问题，这些被明确列为不相关主题。该研究没有展示在推荐、搜索或广告中的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 13:35:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08211v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08211v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Previous research has shown that LLMs finetuned on malicious or incorrect completions within narrow domains (e.g., insecure code or incorrect medical advice) can become broadly misaligned to exhibit harmful behaviors, which is called emergent misalignment. In this work, we investigate whether this phenomenon can extend beyond safety behaviors to a broader spectrum of dishonesty and deception under high-stakes scenarios (e.g., lying under pressure and deceptive behavior). To explore this, we finetune open-sourced LLMs on misaligned completions across diverse domains. Experimental results demonstrate that LLMs show broadly misaligned behavior in dishonesty. Additionally, we further explore this phenomenon in a downstream combined finetuning setting, and find that introducing as little as 1% of misalignment data into a standard downstream task is sufficient to decrease honest behavior over 20%. Furthermore, we consider a more practical human-AI interaction environment where we simulate both benign and biased users to interact with the assistant LLM. Notably, we find that the assistant can be misaligned unintentionally to exacerbate its dishonesty with only 10% biased user population. In summary, we extend the study of emergent misalignment to the domain of dishonesty and deception under high-stakes scenarios, and demonstrate that this risk arises not only through direct finetuning, but also in downstream mixture tasks and practical human-AI interactions.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08173v1" target="_blank" rel="noopener noreferrer">
                NavSpace：导航代理如何遵循空间智能指令
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            NavSpace: How Navigation Agents Follow Spatial Intelligence Instructions
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haolin Yang, Yuxing Long, Zhuoyuan Yu, Zihan Yang, Minghan Wang, Jiapeng Xu, Yih...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究导航代理和空间智能指令，属于机器人导航领域，与推荐系统、搜索或广告的核心技术关联性较弱。虽然涉及智能代理技术，但缺乏明确的RecSys/Search/Ads应用场景，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 12:59:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08173v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08173v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Instruction-following navigation is a key step toward embodied intelligence. Prior benchmarks mainly focus on semantic understanding but overlook systematically evaluating navigation agents' spatial perception and reasoning capabilities. In this work, we introduce the NavSpace benchmark, which contains six task categories and 1,228 trajectory-instruction pairs designed to probe the spatial intelligence of navigation agents. On this benchmark, we comprehensively evaluate 22 navigation agents, including state-of-the-art navigation models and multimodal large language models. The evaluation results lift the veil on spatial intelligence in embodied navigation. Furthermore, we propose SNav, a new spatially intelligent navigation model. SNav outperforms existing navigation agents on NavSpace and real robot tests, establishing a strong baseline for future work.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08120v1" target="_blank" rel="noopener noreferrer">
                通过可验证的全局解释解读LLM作为评判者的策略
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Interpreting LLM-as-a-Judge Policies via Verifiable Global Explanations
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jasmina Gajcin, Erik Miehling, Rahul Nair, Elizabeth Daly, Radu Marinescu, Seshu...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM作为评判者的策略解释和可验证性，这属于纯粹的LLM评估和解释性研究，与幻觉、评估基准等NLP中心主题密切相关。虽然LLM评估在广义上可能影响推荐系统，但论文标题没有表明任何直接的RecSys/Search/Ads应用，而是专注于纯粹的LLM评判机制解释。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 12:05:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08120v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08120v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Using LLMs to evaluate text, that is, LLM-as-a-judge, is increasingly being used at scale to augment or even replace human annotations. As such, it is imperative that we understand the potential biases and risks of doing so. In this work, we propose an approach for extracting high-level concept-based global policies from LLM-as-a-Judge. Our approach consists of two algorithms: 1) CLoVE (Contrastive Local Verifiable Explanations), which generates verifiable, concept-based, contrastive local explanations and 2) GloVE (Global Verifiable Explanations), which uses iterative clustering, summarization and verification to condense local rules into a global policy. We evaluate GloVE on seven standard benchmarking datasets for content harm detection. We find that the extracted global policies are highly faithful to decisions of the LLM-as-a-Judge. Additionally, we evaluated the robustness of global policies to text perturbations and adversarial attacks. Finally, we conducted a user study to evaluate user understanding and satisfaction with global policies.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08111v1" target="_blank" rel="noopener noreferrer">
                评估大语言模型生成的法律解释在社交媒体网红营销监管合规性中的应用
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Evaluating LLM-Generated Legal Explanations for Regulatory Compliance in Social Media Influencer Marketing
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haoyang Gui, Thales Bertaglia, Taylor Annabell, Catalina Goanta, Tjomme Dooper, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM在法律解释和监管合规性评估方面的应用，这属于特定领域应用而非核心推荐系统、搜索或广告技术。虽然涉及社交媒体环境，但焦点是法律合规性而非排名、检索或用户建模等核心领域，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 11:50:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08111v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08111v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.CY</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rise of influencer marketing has blurred boundaries between organic content and sponsored content, making the enforcement of legal rules relating to transparency challenging. Effective regulation requires applying legal knowledge with a clear purpose and reason, yet current detection methods of undisclosed sponsored content generally lack legal grounding or operate as opaque "black boxes". Using 1,143 Instagram posts, we compare gpt-5-nano and gemini-2.5-flash-lite under three prompting strategies with controlled levels of legal knowledge provided. Both models perform strongly in classifying content as sponsored or not (F1 up to 0.93), though performance drops by over 10 points on ambiguous cases. We further develop a taxonomy of reasoning errors, showing frequent citation omissions (28.57%), unclear references (20.71%), and hidden ads exhibiting the highest miscue rate (28.57%). While adding regulatory text to the prompt improves explanation quality, it does not consistently improve detection accuracy. The contribution of this paper is threefold. First, it makes a novel addition to regulatory compliance technology by providing a taxonomy of common errors in LLM-generated legal reasoning to evaluate whether automated moderation is not only accurate but also legally robust, thereby advancing the transparent detection of influencer marketing content. Second, it features an original dataset of LLM explanations annotated by two students who were trained in influencer marketing law. Third, it combines quantitative and qualitative evaluation strategies for LLM explanations and critically reflects on how these findings can support advertising regulatory bodies in automating moderation processes on a solid legal foundation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08098v1" target="_blank" rel="noopener noreferrer">
                思维的成本：大型语言模型中推理、性能与协商成本的多语言分析
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            The Price of Thought: A Multilingual Analysis of Reasoning, Performance, and Cost of Negotiation in Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sherzod Hakimov, Roland Bernard, Tim Leiber, Karl Osswald, Kristina Richert, Rui...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要分析LLM的推理能力、性能表现和成本协商，属于纯粹的LLM能力评估研究。虽然涉及多语言分析，但核心焦点是模型内在能力评估而非RecSys/Search/Ads应用。论文缺乏对推荐系统、搜索或广告场景的具体应用潜力说明，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 11:36:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08098v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08098v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Negotiation is a fundamental challenge for AI agents, as it requires an ability to reason strategically, model opponents, and balance cooperation with competition. We conduct the first comprehensive study systematically evaluating the effect of (LLM-)reasoning on the negotiation abilities of both commercial and open-weight LLMs, and do this across three languages. Using a self-play setup across three diverse dialogue games, we analyse trade-offs between performance and cost, the language consistency of reasoning processes, and the nature of strategic adaptation exhibited by models. Our findings show that enabling reasoning-that is, scaling test time compute-significantly improves negotiation outcomes by enhancing collaboration and helping models overcome task complexities, but comes at a substantial computational cost: reasoning improves GPT-5's performance by 31.4 % while increasing its cost by nearly 400 %. Most critically, we uncover a significant multilingual reasoning distinction: open-weight models consistently switch to English for their internal reasoning steps, even when negotiating in German or Italian (and thus possibly impacting potential explainability gains through the disclosure of reasoning traces), while leading commercial models maintain language consistency between their reasoning and final output.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08091v1" target="_blank" rel="noopener noreferrer">
                万物皆合理：探究大语言模型推理对人类合理性认知的影响
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Everything is Plausible: Investigating the Impact of LLM Rationales on Human Notions of Plausibility
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shramay Palta, Peter Rankel, Sarah Wiegreffe, Rachel Rudinger
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究LLM推理对人类认知的影响，属于心理学和人类认知交互领域。虽然涉及LLM，但其研究重点在于人类对合理性的感知判断，而非LLM在推荐系统、搜索或广告中的技术应用或架构改进。该研究缺乏明确的RecSys/Search/Ads应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 11:22:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08091v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08091v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.HC</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We investigate the degree to which human plausibility judgments of multiple-choice commonsense benchmark answers are subject to influence by (im)plausibility arguments for or against an answer, in particular, using rationales generated by LLMs. We collect 3,000 plausibility judgments from humans and another 13,600 judgments from LLMs. Overall, we observe increases and decreases in mean human plausibility ratings in the presence of LLM-generated PRO and CON rationales, respectively, suggesting that, on the whole, human judges find these rationales convincing. Experiments with LLMs reveal similar patterns of influence. Our findings demonstrate a novel use of LLMs for studying aspects of human cognition, while also raising practical concerns that, even in domains where humans are ``experts'' (i.e., common sense), LLMs have the potential to exert considerable influence on people's beliefs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08049v1" target="_blank" rel="noopener noreferrer">
                过程奖励模型综述：从结果信号到大型语言模型的过程监督
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Survey of Process Reward Models: From Outcome Signals to Process Supervisions for Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Congming Zheng, Jiachen Zhu, Zhuoying Ou, Yuxiang Chen, Kangning Zhang, Rong Sha...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的训练方法和奖励建模，属于纯粹的LLM技术范畴。虽然奖励模型在RLHF中有应用，但论文聚焦于过程监督而非具体应用场景，缺乏与推荐系统、搜索或广告领域的直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 10:35:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08049v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08049v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Although Large Language Models (LLMs) exhibit advanced reasoning ability, conventional alignment remains largely dominated by outcome reward models (ORMs) that judge only final answers. Process Reward Models(PRMs) address this gap by evaluating and guiding reasoning at the step or trajectory level. This survey provides a systematic overview of PRMs through the full loop: how to generate process data, build PRMs, and use PRMs for test-time scaling and reinforcement learning. We summarize applications across math, code, text, multimodal reasoning, robotics, and agents, and review emerging benchmarks. Our goal is to clarify design spaces, reveal open challenges, and guide future research toward fine-grained, robust reasoning alignment.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07993v1" target="_blank" rel="noopener noreferrer">
                利用作者特定上下文进行科学图表标题生成：第三届SciCap挑战赛
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Leveraging Author-Specific Context for Scientific Figure Caption Generation: 3rd SciCap Challenge
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Watcharapong Timklaypachara, Monrada Chiewhawan, Nopporn Lekuthai, Titipat Achak...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于科学图表标题生成这一特定领域的文本生成任务，属于纯粹的LLM内容生成应用。虽然标题生成在技术上与搜索中的文档理解有一定关联，但该工作明确限定在科学图表这一狭窄领域，且没有展示出在推荐系统、搜索或广告中的直接应用潜力，因此与您关注的领域相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 09:30:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07993v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07993v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Scientific figure captions require both accuracy and stylistic consistency to convey visual information. Here, we present a domain-specific caption generation system for the 3rd SciCap Challenge that integrates figure-related textual context with author-specific writing styles using the LaMP-Cap dataset. Our approach uses a two-stage pipeline: Stage 1 combines context filtering, category-specific prompt optimization via DSPy's MIPROv2 and SIMBA, and caption candidate selection; Stage 2 applies few-shot prompting with profile figures for stylistic refinement. Our experiments demonstrate that category-specific prompts outperform both zero-shot and general optimized approaches, improving ROUGE-1 recall by +8.3\% while limiting precision loss to -2.8\% and BLEU-4 reduction to -10.9\%. Profile-informed stylistic refinement yields 40--48\% gains in BLEU scores and 25--27\% in ROUGE. Overall, our system demonstrates that combining contextual understanding with author-specific stylistic adaptation can generate captions that are both scientifically accurate and stylistically faithful to the source paper.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07974v1" target="_blank" rel="noopener noreferrer">
                大型语言模型中的主动混淆表达：利用世界模型实现更好的社会推理
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Active Confusion Expression in Large Language Models: Leveraging World Models toward Better Social Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jialu Du, Guiyang Hou, Yihui Fu, Chen Wu, Wenqi Zhang, Yongliang Shen, Weiming L...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的社会推理能力和世界模型，这属于纯粹的NLP中心话题，与推荐系统、搜索或广告的核心技术进展无关。虽然提到了世界模型，但没有明确说明其在RecSys/Search/Ads中的潜在应用价值，因此相关性很低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 09:07:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07974v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07974v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    While large language models (LLMs) excel in mathematical and code reasoning, we observe they struggle with social reasoning tasks, exhibiting cognitive confusion, logical inconsistencies, and conflation between objective world states and subjective belief states. Through deteiled analysis of DeepSeek-R1's reasoning trajectories, we find that LLMs frequently encounter reasoning impasses and tend to output contradictory terms like "tricky" and "confused" when processing scenarios with multiple participants and timelines, leading to erroneous reasoning or infinite loops. The core issue is their inability to disentangle objective reality from agents' subjective beliefs. To address this, we propose an adaptive world model-enhanced reasoning mechanism that constructs a dynamic textual world model to track entity states and temporal sequences. It dynamically monitors reasoning trajectories for confusion indicators and promptly intervenes by providing clear world state descriptions, helping models navigate through cognitive dilemmas. The mechanism mimics how humans use implicit world models to distinguish between external events and internal beliefs. Evaluations on three social benchmarks demonstrate significant improvements in accuracy (e.g., +10% in Hi-ToM) while reducing computational costs (up to 33.8% token reduction), offering a simple yet effective solution for deploying LLMs in social contexts.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07958v1" target="_blank" rel="noopener noreferrer">
                A²Search：基于强化学习的歧义感知问答
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A$^2$Search: Ambiguity-Aware Question Answering with Reinforcement Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fengji Zhang, Xinyao Niu, Chengyang Ying, Guancheng Lin, Zhongkai Hao, Zhou Fan,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文虽然涉及搜索领域，但主要关注问答任务中的歧义处理和强化学习应用，这属于纯粹的NLP问答范畴。强化学习部分没有明确展示与推荐系统、搜索排序或广告相关的具体应用场景，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:53:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07958v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07958v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in Large Language Models (LLMs) and Reinforcement Learning (RL) have led to strong performance in open-domain question answering (QA). However, existing models still struggle with questions that admit multiple valid answers. Standard QA benchmarks, which typically assume a single gold answer, overlook this reality and thus produce inappropriate training signals. Existing attempts to handle ambiguity often rely on costly manual annotation, which is difficult to scale to multi-hop datasets such as HotpotQA and MuSiQue. In this paper, we present A$^2$Search, an annotation-free, end-to-end training framework to recognize and handle ambiguity. At its core is an automated pipeline that detects ambiguous questions and gathers alternative answers via trajectory sampling and evidence verification. The model is then optimized with RL using a carefully designed $\mathrm{AnsF1}$ reward, which naturally accommodates multiple answers. Experiments on eight open-domain QA benchmarks demonstrate that A$^2$Search achieves new state-of-the-art performance. With only a single rollout, A$^2$Search-7B yields an average $\mathrm{AnsF1}@1$ score of $48.4\%$ across four multi-hop benchmarks, outperforming all strong baselines, including the substantially larger ReSearch-32B ($46.2\%$). Extensive analyses further show that A$^2$Search resolves ambiguity and generalizes across benchmarks, highlighting that embracing ambiguity is essential for building more reliable QA systems. Our code, data, and model weights can be found at https://github.com/zfj1998/A2Search
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07926v1" target="_blank" rel="noopener noreferrer">
                文本生成中事实召回自动评估的全面性度量
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Comprehensiveness Metrics for Automatic Evaluation of Factual Recall in Text Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Adam Dejl, James Barry, Alessandra Pascale, Javier Carnerero Cano
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注文本生成中事实召回的评估指标，属于纯粹的NLP评估基准范畴。虽然评估指标在理论上可能对搜索系统中的事实准确性有间接参考价值，但该研究缺乏与推荐系统、广告或搜索排名的直接关联，且不属于核心领域进展或使能技术范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:22:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07926v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07926v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">I.2.7</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Despite demonstrating remarkable performance across a wide range of tasks, large language models (LLMs) have also been found to frequently produce outputs that are incomplete or selectively omit key information. In sensitive domains, such omissions can result in significant harm comparable to that posed by factual inaccuracies, including hallucinations. In this study, we address the challenge of evaluating the comprehensiveness of LLM-generated texts, focusing on the detection of missing information or underrepresented viewpoints. We investigate three automated evaluation strategies: (1) an NLI-based method that decomposes texts into atomic statements and uses natural language inference (NLI) to identify missing links, (2) a Q&A-based approach that extracts question-answer pairs and compares responses across sources, and (3) an end-to-end method that directly identifies missing content using LLMs. Our experiments demonstrate the surprising effectiveness of the simple end-to-end approach compared to more complex methods, though at the cost of reduced robustness, interpretability and result granularity. We further assess the comprehensiveness of responses from several popular open-weight LLMs when answering user queries based on multiple sources.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07912v1" target="_blank" rel="noopener noreferrer">
                迈向类人评分：用于主观问题评估的统一LLM增强框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Towards Human-Like Grading: A Unified LLM-Enhanced Framework for Subjective Question Evaluation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fanwei Zhua, Jiaxuan He, Xiaoxiao Chen, Zulong Chen, Quan Lu, Chenrui Mei
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注教育领域的自动评分系统，属于LLM在特定垂直领域的应用。虽然涉及LLM技术，但其核心应用场景（教育评分）与推荐系统、搜索或广告领域没有直接关联，也不涉及这些领域的特定挑战或数据模态。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:05:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07912v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07912v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Automatic grading of subjective questions remains a significant challenge in examination assessment due to the diversity in question formats and the open-ended nature of student responses. Existing works primarily focus on a specific type of subjective question and lack the generality to support comprehensive exams that contain diverse question types. In this paper, we propose a unified Large Language Model (LLM)-enhanced auto-grading framework that provides human-like evaluation for all types of subjective questions across various domains. Our framework integrates four complementary modules to holistically evaluate student answers. In addition to a basic text matching module that provides a foundational assessment of content similarity, we leverage the powerful reasoning and generative capabilities of LLMs to: (1) compare key knowledge points extracted from both student and reference answers, (2) generate a pseudo-question from the student answer to assess its relevance to the original question, and (3) simulate human evaluation by identifying content-related and non-content strengths and weaknesses. Extensive experiments on both general-purpose and domain-specific datasets show that our framework consistently outperforms traditional and LLM-based baselines across multiple grading metrics. Moreover, the proposed system has been successfully deployed in real-world training and certification exams at a major e-commerce enterprise.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07896v1" target="_blank" rel="noopener noreferrer">
                ACE：面向多跳事实回忆的属性控制知识编辑
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ACE: Attribution-Controlled Knowledge Editing for Multi-hop Factual Recall
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiayu Yang, Yuxuan Fan, Songning Lai, Shengen Wu, Jiaqi Tang, Chun Kang, Zhijian...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注知识编辑和事实回忆，属于LLM内部知识管理范畴，与推荐系统、搜索或广告的核心技术关联较弱。虽然知识编辑技术理论上可能用于更新推荐系统中的实体知识，但论文标题明确聚焦于多跳事实回忆这一特定NLP任务，缺乏明确的RecSys/Search/Ads应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 07:46:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07896v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07896v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) require efficient knowledge editing (KE) to update factual information, yet existing methods exhibit significant performance decay in multi-hop factual recall. This failure is particularly acute when edits involve intermediate implicit subjects within reasoning chains. Through causal analysis, we reveal that this limitation stems from an oversight of how chained knowledge is dynamically represented and utilized at the neuron level. We discover that during multi hop reasoning, implicit subjects function as query neurons, which sequentially activate corresponding value neurons across transformer layers to accumulate information toward the final answer, a dynamic prior KE work has overlooked. Guided by this insight, we propose ACE: Attribution-Controlled Knowledge Editing for Multi-hop Factual Recall, a framework that leverages neuron-level attribution to identify and edit these critical query-value (Q-V) pathways. ACE provides a mechanistically grounded solution for multi-hop KE, empirically outperforming state-of-the-art methods by 9.44% on GPT-J and 37.46% on Qwen3-8B. Our analysis further reveals more fine-grained activation patterns in Qwen3 and demonstrates that the semantic interpretability of value neurons is orchestrated by query-driven accumulation. These findings establish a new pathway for advancing KE capabilities based on the principled understanding of internal reasoning mechanisms.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07892v1" target="_blank" rel="noopener noreferrer">
                指标计算基准：面向大型语言模型的代码可验证复杂指令遵循基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Metric Calculating Benchmark: Code-Verifiable Complicate Instruction Following Benchmark for Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hyeonseok Moon, Seongtae Hong, Jaehyung Seo, Heuiseok Lim
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于LLM的基准测试和评估方法，属于纯粹的评估基准研究，与我的核心关注点（推荐系统、搜索广告领域的核心进展、使能技术及应用）无关。虽然涉及复杂指令遵循，但主要关注代码验证和基准构建，没有明确的推荐/搜索/广告应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 07:43:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07892v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07892v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent frontier-level LLMs have saturated many previously difficult benchmarks, leaving little room for further differentiation. This progress highlights the need for challenging benchmarks that provide objective verification. In this paper, we introduce MCBench, a benchmark designed to evaluate whether LLMs can execute string-matching NLP metrics by strictly following step-by-step instructions. Unlike prior benchmarks that depend on subjective judgments or general reasoning, MCBench offers an objective, deterministic and codeverifiable evaluation. This setup allows us to systematically test whether LLMs can maintain accurate step-by-step execution, including instruction adherence, numerical computation, and long-range consistency in handling intermediate results. To ensure objective evaluation of these abilities, we provide a parallel reference code that can evaluate the accuracy of LLM output. We provide three evaluative metrics and three benchmark variants designed to measure the detailed instruction understanding capability of LLMs. Our analyses show that MCBench serves as an effective and objective tool for evaluating the capabilities of cutting-edge LLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07890v1" target="_blank" rel="noopener noreferrer">
                标准语到方言的迁移趋势在文本和语音中存在差异：德语方言意图与主题分类案例研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Standard-to-Dialect Transfer Trends Differ across Text and Speech: A Case Study on Intent and Topic Classification in German Dialects
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Verena Blaschke, Miriam Winkler, Barbara Plank
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究方言处理中的迁移学习差异，属于特定语言处理领域。虽然意图分类在搜索和推荐中有应用，但论文聚焦于德语方言这一狭窄领域，且未涉及LLM、Transformer架构或推荐系统的核心进展，与当前关注点的直接关联性较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 07:43:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07890v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07890v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Research on cross-dialectal transfer from a standard to a non-standard dialect variety has typically focused on text data. However, dialects are primarily spoken, and non-standard spellings are known to cause issues in text processing. We compare standard-to-dialect transfer in three settings: text models, speech models, and cascaded systems where speech first gets automatically transcribed and then further processed by a text model. In our experiments, we focus on German and multiple German dialects in the context of written and spoken intent and topic classification. To that end, we release the first dialectal audio intent classification dataset. We find that the speech-only setup provides the best results on the dialect data while the text-only setup works best on the standard data. While the cascaded systems lag behind the text-only models for German, they perform relatively well on the dialectal data if the transcription system generates normalized, standard-like output.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07881v1" target="_blank" rel="noopener noreferrer">
                CS3-Bench：评估和增强普通话-英语语码转换的语音到语音大语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CS3-Bench: Evaluating and Enhancing Speech-to-Speech LLMs for Mandarin-English Code-Switching
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Heyang Liu, Yuhao Wang, Ziyang Cheng, Ronghua Wu, Qunshan Gu, Yanfeng Wang, Yu W...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于语音到语音的语码转换技术，属于语音处理领域，与推荐系统、搜索或广告的核心技术没有直接关联。虽然涉及LLM技术，但其应用场景（语音处理、语码转换）在RecSys/Search/Ads中缺乏明确的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 07:34:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07881v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07881v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The advancement of multimodal large language models has accelerated the development of speech-to-speech interaction systems. While natural monolingual interaction has been achieved, we find existing models exhibit deficiencies in language alignment. In our proposed Code-Switching Speech-to-Speech Benchmark (CS3-Bench), experiments on 7 mainstream models demonstrate a relative performance drop of up to 66% in knowledge-intensive question answering and varying degrees of misunderstanding in open-ended conversations. Starting from a model with severe performance deterioration, we propose both data constructions and training approaches to improve the language alignment capabilities, specifically employing Chain of Recognition (CoR) to enhance understanding and Keyword Highlighting (KH) to guide generation. Our approach improves the knowledge accuracy from 25.14% to 46.13%, with open-ended understanding rate from 64.5% to 86.5%, and significantly reduces pronunciation errors in the secondary language. CS3-Bench is available at https://huggingface.co/datasets/VocalNet/CS3-Bench.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07880v1" target="_blank" rel="noopener noreferrer">
                LLM真的需要10+次思考来“找出1000天后的时间”吗？——迈向对LLM过度思考的结构化理解
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Do LLMs Really Need 10+ Thoughts for "Find the Time 1000 Days Later"? Towards Structural Understanding of LLM Overthinking
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xinliang Frederick Zhang, Anhad Mohananey, Alexandra Chronopoulou, Pinelopi Papa...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究LLM的推理过程和过度思考现象，这属于LLM内部工作机制分析。虽然涉及LLM技术，但焦点是推理效率而非在推荐系统、搜索或广告中的具体应用。论文没有展示明确的RecSys/Search/Ads应用潜力，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 07:33:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07880v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07880v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Models employing long chain-of-thought (CoT) reasoning have shown superior performance on complex reasoning tasks. Yet, this capability introduces a critical and often overlooked inefficiency -- overthinking -- models often engage in unnecessarily extensive reasoning even for simple queries, incurring significant computations without accuracy improvements. While prior work has explored solutions to mitigate overthinking, a fundamental gap remains in our understanding of its underlying causes. Most existing analyses are limited to superficial, profiling-based observations, failing to delve into LLMs' inner workings. This study introduces a systematic, fine-grained analyzer of LLMs' thought process to bridge the gap, TRACE. We first benchmark the overthinking issue, confirming that long-thinking models are five to twenty times slower on simple tasks with no substantial gains. We then use TRACE to first decompose the thought process into minimally complete sub-thoughts. Next, by inferring discourse relationships among sub-thoughts, we construct granular thought progression graphs and subsequently identify common thinking patterns for topically similar queries. Our analysis reveals two major patterns for open-weight thinking models -- Explorer and Late Landing. This finding provides evidence that over-verification and over-exploration are the primary drivers of overthinking in LLMs. Grounded in thought structures, we propose a utility-based definition of overthinking, which moves beyond length-based metrics. This revised definition offers a more insightful understanding of LLMs' thought progression, as well as practical guidelines for principled overthinking management.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07877v1" target="_blank" rel="noopener noreferrer">
                准备翻译而非表征？多语言大语言模型在语言家族和领域间的偏见与性能差距
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Ready to Translate, Not to Represent? Bias and Performance Gaps in Multilingual LLMs Across Language Families and Domains
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Md. Faiyaz Abdullah Sayeedi, Md. Mahbub Alam, Subhey Sadi Rahman, Md. Adnanul Is...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多语言LLM的偏见和性能差距问题，这属于纯粹的NLP评估和基准测试范畴。虽然多语言能力在理论上可能对国际化推荐/搜索系统有影响，但论文焦点是语言家族间的性能差异和翻译偏差，而非直接应用于推荐系统、搜索或广告的技术进步。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 07:28:30
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07877v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07877v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rise of Large Language Models (LLMs) has redefined Machine Translation (MT), enabling context-aware and fluent translations across hundreds of languages and textual domains. Despite their remarkable capabilities, LLMs often exhibit uneven performance across language families and specialized domains. Moreover, recent evidence reveals that these models can encode and amplify different biases present in their training data, posing serious concerns for fairness, especially in low-resource languages. To address these gaps, we introduce Translation Tangles, a unified framework and dataset for evaluating the translation quality and fairness of open-source LLMs. Our approach benchmarks 24 bidirectional language pairs across multiple domains using different metrics. We further propose a hybrid bias detection pipeline that integrates rule-based heuristics, semantic similarity filtering, and LLM-based validation. We also introduce a high-quality, bias-annotated dataset based on human evaluations of 1,439 translation-reference pairs. The code and dataset are accessible on GitHub: https://github.com/faiyazabdullah/TranslationTangles
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07774v1" target="_blank" rel="noopener noreferrer">
                使用评分标准奖励治愈LLM数学推理中的奇迹步骤
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Curing Miracle Steps in LLM Mathematical Reasoning with Rubric Rewards
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Youliang Yuan, Qiuyang Mang, Jingbang Chen, Hong Wan, Xiaoyuan Liu, Junjielong X...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM在数学推理任务中的特定问题（奇迹步骤）和训练方法（评分标准奖励），这属于纯粹的NLP推理改进范畴。虽然涉及LLM训练技术，但缺乏明确的与推荐系统、搜索或广告相关的潜在应用场景，更像是通用NLP能力的优化。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 04:30:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07774v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07774v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models for mathematical reasoning are typically trained with outcome-based rewards, which credit only the final answer. In our experiments, we observe that this paradigm is highly susceptible to reward hacking, leading to a substantial overestimation of a model's reasoning ability. This is evidenced by a high incidence of false positives - solutions that reach the correct final answer through an unsound reasoning process. Through a systematic analysis with human verification, we establish a taxonomy of these failure modes, identifying patterns like Miracle Steps - abrupt jumps to a correct output without a valid preceding derivation. Probing experiments suggest a strong association between these Miracle Steps and memorization, where the model appears to recall the answer directly rather than deriving it. To mitigate this systemic issue, we introduce the Rubric Reward Model (RRM), a process-oriented reward function that evaluates the entire reasoning trajectory against problem-specific rubrics. The generative RRM provides fine-grained, calibrated rewards (0-1) that explicitly penalize logical flaws and encourage rigorous deduction. When integrated into a reinforcement learning pipeline, RRM-based training consistently outperforms outcome-only supervision across four math benchmarks. Notably, it boosts Verified Pass@1024 on AIME2024 from 26.7% to 62.6% and reduces the incidence of Miracle Steps by 71%. Our work demonstrates that rewarding the solution process is crucial for building models that are not only more accurate but also more reliable.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07743v1" target="_blank" rel="noopener noreferrer">
                OpenRubrics：面向奖励建模和大语言模型对齐的可扩展合成评分标准生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            OpenRubrics: Towards Scalable Synthetic Rubric Generation for Reward Modeling and LLM Alignment
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tianci Liu, Ran Xu, Tony Yu, Ilgee Hong, Carl Yang, Tuo Zhao, Haoyu Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM对齐和奖励建模中的评分标准生成，属于纯粹的LLM对齐技术范畴。虽然奖励建模在理论上可以应用于推荐系统的偏好学习，但论文标题明确聚焦于评分标准生成这一特定对齐任务，与推荐/搜索/广告系统的核心排序和匹配问题关联度较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 03:31:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07743v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07743v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Reward modeling lies at the core of reinforcement learning from human feedback (RLHF), yet most existing reward models rely on scalar or pairwise judgments that fail to capture the multifaceted nature of human preferences. Recent studies have explored rubrics-as-rewards (RaR) that uses structured natural language criteria that capture multiple dimensions of response quality. However, producing rubrics that are both reliable and scalable remains a key challenge. In this work, we introduce OpenRubrics, a diverse, large-scale collection of (prompt, rubric) pairs for training rubric-generation and rubric-based reward models. To elicit discriminative and comprehensive evaluation signals, we introduce Contrastive Rubric Generation (CRG), which derives both hard rules (explicit constraints) and principles (implicit qualities) by contrasting preferred and rejected responses. We further improve reliability by enforcing preference-label consistency via rejection sampling to remove noisy rubrics. Across multiple reward-modeling benchmarks, our rubric-based reward model, Rubric-RM, surpasses strong size-matched baselines by 6.8%. These gains transfer to policy models on instruction-following and biomedical benchmarks. Our results show that rubrics provide scalable alignment signals that narrow the gap between costly human evaluation and automated reward modeling, enabling a new principle-driven paradigm for LLM alignment.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07737v1" target="_blank" rel="noopener noreferrer">
                ToolExpander：将工具使用强化学习的前沿扩展到弱大型语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ToolExpander: Extending the Frontiers of Tool-Using Reinforcement Learning to Weak LLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fu Chen, Peng Wang, Xiyin Li, Wen Li, Shichi Lei, Dongdong Xiang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注工具使用强化学习与弱LLMs的结合，这属于强化学习的特定应用领域。虽然提到了LLMs，但核心是RL方法而非LLM技术本身，且没有明确说明在推荐系统、搜索或广告中的具体应用潜力。该工作更偏向于通用的RL技术改进，而非直接相关的领域应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 03:20:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07737v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07737v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Training Large Language Models (LLMs) with Group Relative Policy Optimization (GRPO) encounters a significant challenge: models often fail to produce accurate responses, particularly in small-scale architectures. This limitation not only diminishes performance improvements and undermines the potential of GRPO but also frequently leads to mid-training collapse, adversely affecting stability and final efficacy. To address these issues, we propose ToolExpander, a novel framework that advances tool-oriented reinforcement learning for resource-constrained LLMs through two key innovations:(1) Dynamic Multi-Round Hard Sampling, which dynamically substitutes challenging samples(those without correct outputs over 10 rollouts) with high-quality few-shot demonstrations during training, coupled with an exponential learning rate decay strategy to mitigate oscillations;(2) Self-Exemplifying Thinking, an enhanced GRPO framework that eliminates KL divergence and incorporates adjusted clipping coefficients, encouraging models to autonomously generate and analyze few-shot examples via a minimal additional reward (0.01).Experimental results demonstrate that ToolExpander significantly enhances tool-using capabilities in LLMs, especially in weaker small-scale models, improving both training stability and overall performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08575v1" target="_blank" rel="noopener noreferrer">
                ReSplat：学习循环高斯泼溅
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ReSplat: Learning Recurrent Gaussian Splats
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haofei Xu, Daniel Barath, Andreas Geiger, Marc Pollefeys
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题涉及计算机视觉中的3D场景表示技术（高斯泼溅），属于纯粹的视觉研究方向。虽然循环学习机制可能具有时序建模能力，但论文标题没有显示与推荐系统、搜索或广告的直接关联，也没有明确表明其技术可以应用于这些领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08575v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08575v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    While feed-forward Gaussian splatting models provide computational efficiency and effectively handle sparse input settings, their performance is fundamentally limited by the reliance on a single forward pass during inference. We propose ReSplat, a feed-forward recurrent Gaussian splatting model that iteratively refines 3D Gaussians without explicitly computing gradients. Our key insight is that the Gaussian splatting rendering error serves as a rich feedback signal, guiding the recurrent network to learn effective Gaussian updates. This feedback signal naturally adapts to unseen data distributions at test time, enabling robust generalization. To initialize the recurrent process, we introduce a compact reconstruction model that operates in a $16 \times$ subsampled space, producing $16 \times$ fewer Gaussians than previous per-pixel Gaussian models. This substantially reduces computational overhead and allows for efficient Gaussian updates. Extensive experiments across varying of input views (2, 8, 16), resolutions ($256 \times 256$ to $540 \times 960$), and datasets (DL3DV and RealEstate10K) demonstrate that our method achieves state-of-the-art performance while significantly reducing the number of Gaussians and improving the rendering speed. Our project page is at https://haofeixu.github.io/resplat/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08568v1" target="_blank" rel="noopener noreferrer">
                NovaFlow：通过生成视频中的可操作流实现零样本操控
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            NovaFlow: Zero-Shot Manipulation via Actionable Flow from Generated Videos
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hongyu Li, Lingfeng Sun, Yafei Hu, Duy Ta, Jennifer Barry, George Konidaris, Jia...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频生成和动作操控技术，属于计算机视觉和生成式AI领域。虽然标题提到'零样本'概念，但核心内容涉及视频生成和动作流操控，与推荐系统、搜索或广告的核心技术栈没有直接关联，也没有明确的Transformer架构改进或LLM应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08568v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08568v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Enabling robots to execute novel manipulation tasks zero-shot is a central goal in robotics. Most existing methods assume in-distribution tasks or rely on fine-tuning with embodiment-matched data, limiting transfer across platforms. We present NovaFlow, an autonomous manipulation framework that converts a task description into an actionable plan for a target robot without any demonstrations. Given a task description, NovaFlow synthesizes a video using a video generation model and distills it into 3D actionable object flow using off-the-shelf perception modules. From the object flow, it computes relative poses for rigid objects and realizes them as robot actions via grasp proposals and trajectory optimization. For deformable objects, this flow serves as a tracking objective for model-based planning with a particle-based dynamics model. By decoupling task understanding from low-level control, NovaFlow naturally transfers across embodiments. We validate on rigid, articulated, and deformable object manipulation tasks using a table-top Franka arm and a Spot quadrupedal mobile robot, and achieve effective zero-shot execution without demonstrations or embodiment-specific training. Project website: https://novaflow.lhy.xyz/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08561v1" target="_blank" rel="noopener noreferrer">
                MultiCOIN：多模态可控视频中间帧生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MultiCOIN: Multi-Modal COntrollable Video INbetweening
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Maham Tanveer, Yang Zhou, Simon Niklaus, Ali Mahdavi Amiri, Hao Zhang, Krishna K...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频生成领域的中间帧生成技术，属于计算机视觉范畴。虽然涉及多模态控制，但其核心应用场景是视频内容生成和编辑，与推荐系统、搜索或广告的排序和匹配任务没有直接关联。在推荐/搜索/广告领域缺乏明确的应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08561v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08561v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Video inbetweening creates smooth and natural transitions between two image frames, making it an indispensable tool for video editing and long-form video synthesis. Existing works in this domain are unable to generate large, complex, or intricate motions. In particular, they cannot accommodate the versatility of user intents and generally lack fine control over the details of intermediate frames, leading to misalignment with the creative mind. To fill these gaps, we introduce \modelname{}, a video inbetweening framework that allows multi-modal controls, including depth transition and layering, motion trajectories, text prompts, and target regions for movement localization, while achieving a balance between flexibility, ease of use, and precision for fine-grained video interpolation. To achieve this, we adopt the Diffusion Transformer (DiT) architecture as our video generative model, due to its proven capability to generate high-quality long videos. To ensure compatibility between DiT and our multi-modal controls, we map all motion controls into a common sparse and user-friendly point-based representation as the video/noise input. Further, to respect the variety of controls which operate at varying levels of granularity and influence, we separate content controls and motion controls into two branches to encode the required features before guiding the denoising process, resulting in two generators, one for motion and the other for content. Finally, we propose a stage-wise training strategy to ensure that our model learns the multi-modal controls smoothly. Extensive qualitative and quantitative experiments demonstrate that multi-modal controls enable a more dynamic, customizable, and contextually accurate visual narrative.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08559v1" target="_blank" rel="noopener noreferrer">
                SciVideoBench：大型多模态模型中的科学视频推理基准测试
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SciVideoBench: Benchmarking Scientific Video Reasoning in Large Multimodal Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Andong Deng, Taojiannan Yang, Shoubin Yu, Lincoln Spencer, Mohit Bansal, Chen Ch...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于科学视频推理的基准测试，属于纯粹的评估基准范畴，这在无关主题中明确排除。虽然涉及多模态模型，但科学视频领域与推荐系统、搜索或广告没有直接关联，且基准测试本身是评估性质而非技术进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08559v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08559v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Multimodal Models (LMMs) have achieved remarkable progress across various capabilities; however, complex video reasoning in the scientific domain remains a significant and challenging frontier. Current video benchmarks predominantly target general scenarios where perception/recognition is heavily relied on, while with relatively simple reasoning tasks, leading to saturation and thus failing to effectively evaluate advanced multimodal cognitive skills. To address this critical gap, we introduce SciVideoBench, a rigorous benchmark specifically designed to assess advanced video reasoning in scientific contexts. SciVideoBench consists of 1,000 carefully crafted multiple-choice questions derived from cutting-edge scientific experimental videos spanning over 25 specialized academic subjects and verified by a semi-automatic system. Each question demands sophisticated domain-specific knowledge, precise spatiotemporal perception, and intricate logical reasoning, effectively challenging models' higher-order cognitive abilities. Our evaluation highlights significant performance deficits in state-of-the-art proprietary and open-source LMMs, including Gemini 2.5 Pro and Qwen2.5-VL, indicating substantial room for advancement in video reasoning capabilities. Detailed analyses of critical factors such as reasoning complexity and visual grounding provide valuable insights and clear direction for future developments in LMMs, driving the evolution of truly capable multimodal AI co-scientists. We hope SciVideoBench could fit the interests of the community and help to push the boundary of cutting-edge AI for border science.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08555v1" target="_blank" rel="noopener noreferrer">
                VideoCanvas：通过上下文条件化从任意时空补丁实现统一视频补全
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VideoCanvas: Unified Video Completion from Arbitrary Spatiotemporal Patches via In-Context Conditioning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Minghong Cai, Qiulin Wang, Zongli Ye, Wenze Liu, Quande Liu, Weicai Ye, Xintao W...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频补全技术，属于计算机视觉领域的特定应用。虽然提到了上下文条件化，但其核心是视频内容生成和补全，与推荐系统、搜索或广告的排名和匹配任务关联性较弱。没有明确的机制或应用表明该技术能直接应用于异构数据处理或推荐系统场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:58:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08555v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08555v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce the task of arbitrary spatio-temporal video completion, where a video is generated from arbitrary, user-specified patches placed at any spatial location and timestamp, akin to painting on a video canvas. This flexible formulation naturally unifies many existing controllable video generation tasks--including first-frame image-to-video, inpainting, extension, and interpolation--under a single, cohesive paradigm. Realizing this vision, however, faces a fundamental obstacle in modern latent video diffusion models: the temporal ambiguity introduced by causal VAEs, where multiple pixel frames are compressed into a single latent representation, making precise frame-level conditioning structurally difficult. We address this challenge with VideoCanvas, a novel framework that adapts the In-Context Conditioning (ICC) paradigm to this fine-grained control task with zero new parameters. We propose a hybrid conditioning strategy that decouples spatial and temporal control: spatial placement is handled via zero-padding, while temporal alignment is achieved through Temporal RoPE Interpolation, which assigns each condition a continuous fractional position within the latent sequence. This resolves the VAE's temporal ambiguity and enables pixel-frame-aware control on a frozen backbone. To evaluate this new capability, we develop VideoCanvasBench, the first benchmark for arbitrary spatio-temporal video completion, covering both intra-scene fidelity and inter-scene creativity. Experiments demonstrate that VideoCanvas significantly outperforms existing conditioning paradigms, establishing a new state of the art in flexible and unified video generation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08551v1" target="_blank" rel="noopener noreferrer">
                ARTDECO：基于结构化场景表示的实时高效高保真3D重建方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ARTDECO: Towards Efficient and High-Fidelity On-the-Fly 3D Reconstruction with Structured Scene Representation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Guanghao Li, Kerui Ren, Linning Xu, Zhewen Zheng, Changjian Jiang, Xin Gao, Bo D...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D重建技术，属于计算机视觉领域，与推荐系统、搜索或广告的核心技术栈关联度极低。虽然结构化场景表示在概念上可能与多模态建模有微弱联系，但论文明确聚焦于3D重建应用，缺乏在推荐/搜索/广告领域的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:57:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08551v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08551v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    On-the-fly 3D reconstruction from monocular image sequences is a long-standing challenge in computer vision, critical for applications such as real-to-sim, AR/VR, and robotics. Existing methods face a major tradeoff: per-scene optimization yields high fidelity but is computationally expensive, whereas feed-forward foundation models enable real-time inference but struggle with accuracy and robustness. In this work, we propose ARTDECO, a unified framework that combines the efficiency of feed-forward models with the reliability of SLAM-based pipelines. ARTDECO uses 3D foundation models for pose estimation and point prediction, coupled with a Gaussian decoder that transforms multi-scale features into structured 3D Gaussians. To sustain both fidelity and efficiency at scale, we design a hierarchical Gaussian representation with a LoD-aware rendering strategy, which improves rendering fidelity while reducing redundancy. Experiments on eight diverse indoor and outdoor benchmarks show that ARTDECO delivers interactive performance comparable to SLAM, robustness similar to feed-forward systems, and reconstruction quality close to per-scene optimization, providing a practical path toward on-the-fly digitization of real-world environments with both accurate geometry and high visual fidelity. Explore more demos on our project page: https://city-super.github.io/artdeco/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08547v1" target="_blank" rel="noopener noreferrer">
                R2RGEN：面向空间泛化操作的实到实三维数据生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            R2RGEN: Real-to-Real 3D Data Generation for Spatially Generalized Manipulation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiuwei Xu, Angyuan Ma, Hankun Li, Bingyao Yu, Zheng Zhu, Jie Zhou, Jiwen Lu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D数据生成和空间操作，属于计算机视觉和图形学领域，与推荐系统、搜索或广告的核心技术关联度极低。虽然3D数据在某些特定场景（如AR/VR广告）可能有潜在应用，但论文标题未表明与异构数据建模、Transformer架构或LLM技术有任何直接联系，因此相关性较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:55:44
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08547v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08547v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Towards the aim of generalized robotic manipulation, spatial generalization is the most fundamental capability that requires the policy to work robustly under different spatial distribution of objects, environment and agent itself. To achieve this, substantial human demonstrations need to be collected to cover different spatial configurations for training a generalized visuomotor policy via imitation learning. Prior works explore a promising direction that leverages data generation to acquire abundant spatially diverse data from minimal source demonstrations. However, most approaches face significant sim-to-real gap and are often limited to constrained settings, such as fixed-base scenarios and predefined camera viewpoints. In this paper, we propose a real-to-real 3D data generation framework (R2RGen) that directly augments the pointcloud observation-action pairs to generate real-world data. R2RGen is simulator- and rendering-free, thus being efficient and plug-and-play. Specifically, given a single source demonstration, we introduce an annotation mechanism for fine-grained parsing of scene and trajectory. A group-wise augmentation strategy is proposed to handle complex multi-object compositions and diverse task constraints. We further present camera-aware processing to align the distribution of generated data with real-world 3D sensor. Empirically, R2RGen substantially enhances data efficiency on extensive experiments and demonstrates strong potential for scaling and application on mobile manipulation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08532v1" target="_blank" rel="noopener noreferrer">
                连续上下文：基于指令的图像编辑的连续强度控制
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Kontinuous Kontext: Continuous Strength Control for Instruction-based Image Editing
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Rishubh Parihar, Or Patashnik, Daniil Ostashev, R. Venkatesh Babu, Daniel Cohen-...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于图像编辑的指令控制技术，属于计算机视觉和AIGC领域。虽然涉及指令控制机制，但其核心应用是图像内容生成和编辑，与推荐系统、搜索或广告的排序和建模任务没有直接关联。该技术可能间接启发多模态交互，但缺乏明确的RecSys/Search/Ads应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:51:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08532v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08532v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Instruction-based image editing offers a powerful and intuitive way to manipulate images through natural language. Yet, relying solely on text instructions limits fine-grained control over the extent of edits. We introduce Kontinuous Kontext, an instruction-driven editing model that provides a new dimension of control over edit strength, enabling users to adjust edits gradually from no change to a fully realized result in a smooth and continuous manner. Kontinuous Kontext extends a state-of-the-art image editing model to accept an additional input, a scalar edit strength which is then paired with the edit instruction, enabling explicit control over the extent of the edit. To inject this scalar information, we train a lightweight projector network that maps the input scalar and the edit instruction to coefficients in the model's modulation space. For training our model, we synthesize a diverse dataset of image-edit-instruction-strength quadruplets using existing generative models, followed by a filtering stage to ensure quality and consistency. Kontinuous Kontext provides a unified approach for fine-grained control over edit strength for instruction driven editing from subtle to strong across diverse operations such as stylization, attribute, material, background, and shape changes, without requiring attribute-specific training.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08530v1" target="_blank" rel="noopener noreferrer">
                X2Video：适配扩散模型用于多模态可控神经视频渲染
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            X2Video: Adapting Diffusion Models for Multimodal Controllable Neural Video Rendering
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhitong Huang, Mohan Zhang, Renhan Wang, Rui Tang, Hao Zhu, Jing Liao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频生成和渲染技术，属于计算机视觉领域。虽然提到了多模态控制，但缺乏与推荐系统、搜索或广告的直接关联。扩散模型在内容生成方面的应用与排名、检索等核心业务场景关联较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:50:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08530v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08530v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.GR</span><span class="category-tag">cs.CV</span><span class="category-tag">68U05</span><span class="category-tag">I.3.3; I.3.6</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present X2Video, the first diffusion model for rendering photorealistic videos guided by intrinsic channels including albedo, normal, roughness, metallicity, and irradiance, while supporting intuitive multi-modal controls with reference images and text prompts for both global and local regions. The intrinsic guidance allows accurate manipulation of color, material, geometry, and lighting, while reference images and text prompts provide intuitive adjustments in the absence of intrinsic information. To enable these functionalities, we extend the intrinsic-guided image generation model XRGB to video generation by employing a novel and efficient Hybrid Self-Attention, which ensures temporal consistency across video frames and also enhances fidelity to reference images. We further develop a Masked Cross-Attention to disentangle global and local text prompts, applying them effectively onto respective local and global regions. For generating long videos, our novel Recursive Sampling method incorporates progressive frame sampling, combining keyframe prediction and frame interpolation to maintain long-range temporal consistency while preventing error accumulation. To support the training of X2Video, we assembled a video dataset named InteriorVideo, featuring 1,154 rooms from 295 interior scenes, complete with reliable ground-truth intrinsic channel sequences and smooth camera trajectories. Both qualitative and quantitative evaluations demonstrate that X2Video can produce long, temporally consistent, and photorealistic videos guided by intrinsic conditions. Additionally, X2Video effectively accommodates multi-modal controls with reference images, global and local text prompts, and simultaneously supports editing on color, material, geometry, and lighting through parametric tuning. Project page: https://luckyhzt.github.io/x2video
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08512v1" target="_blank" rel="noopener noreferrer">
                我们是否已看遍所有场景？基于场景图感知的深度点云压缩
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Have We Scene It All? Scene Graph-Aware Deep Point Cloud Compression
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nikolaos Stathoulopoulos, Christoforos Kanellakis, George Nikolakopoulos
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注点云数据的压缩技术，属于计算机视觉和3D视觉领域。虽然提到了场景图概念，但这与推荐系统、搜索或广告中的异构数据处理没有直接关联。点云压缩技术在当前焦点领域缺乏明确的应用场景，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:45:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08512v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08512v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Efficient transmission of 3D point cloud data is critical for advanced perception in centralized and decentralized multi-agent robotic systems, especially nowadays with the growing reliance on edge and cloud-based processing. However, the large and complex nature of point clouds creates challenges under bandwidth constraints and intermittent connectivity, often degrading system performance. We propose a deep compression framework based on semantic scene graphs. The method decomposes point clouds into semantically coherent patches and encodes them into compact latent representations with semantic-aware encoders conditioned by Feature-wise Linear Modulation (FiLM). A folding-based decoder, guided by latent features and graph node attributes, enables structurally accurate reconstruction. Experiments on the SemanticKITTI and nuScenes datasets show that the framework achieves state-of-the-art compression rates, reducing data size by up to 98% while preserving both structural and semantic fidelity. In addition, it supports downstream applications such as multi-robot pose graph optimization and map merging, achieving trajectory accuracy and map alignment comparable to those obtained with raw LiDAR scans.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08485v1" target="_blank" rel="noopener noreferrer">
                InstructX：基于多模态大语言模型引导的统一视觉编辑方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            InstructX: Towards Unified Visual Editing with MLLM Guidance
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chong Mou, Qichao Sun, Yanze Wu, Pengze Zhang, Xinghui Li, Fulong Ye, Songtao Zh...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于多模态大语言模型在视觉编辑领域的应用，属于纯粹的视觉内容生成范畴。虽然涉及多模态技术，但其核心应用是视觉编辑而非推荐系统、搜索或广告中的排序任务，与当前关注的技术方向关联度极低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:26:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08485v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08485v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With recent advances in Multimodal Large Language Models (MLLMs) showing strong visual understanding and reasoning, interest is growing in using them to improve the editing performance of diffusion models. Despite rapid progress, most studies lack an in-depth analysis of MLLM design choices. Moreover, the integration of MLLMs and diffusion models remains an open challenge in some difficult tasks, such as video editing. In this paper, we present InstructX, a unified framework for image and video editing. Specifically, we conduct a comprehensive study on integrating MLLMs and diffusion models for instruction-driven editing across diverse tasks. Building on this study, we analyze the cooperation and distinction between images and videos in unified modeling. (1) We show that training on image data can lead to emergent video editing capabilities without explicit supervision, thereby alleviating the constraints imposed by scarce video training data. (2) By incorporating modality-specific MLLM features, our approach effectively unifies image and video editing tasks within a single model. Extensive experiments demonstrate that our method can handle a broad range of image and video editing tasks and achieves state-of-the-art performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08480v1" target="_blank" rel="noopener noreferrer">
                Video-STAR：通过工具增强开放词汇动作识别
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Video-STAR: Reinforcing Open-Vocabulary Action Recognition with Tools
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhenlong Yuan, Xiangyan Qu, Chengxuan Qian, Rui Chen, Jing Tang, Lei Sun, Xiangx...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉领域的开放词汇动作识别，虽然涉及多模态学习概念，但与推荐系统、搜索或广告的核心技术关联较弱。其工具增强方法可能对处理视频内容有一定启发，但在当前焦点领域的直接应用潜力有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:20:44
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08480v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08480v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multimodal large language models (MLLMs) have demonstrated remarkable potential in bridging visual and textual reasoning, yet their reliance on text-centric priors often limits their ability to disentangle semantically similar actions in open-vocabulary scenarios. To address this, we propose Video-STAR, a framework that harmonizes contextual sub-motion decomposition with tool-augmented reinforcement learning for open-vocabulary action recognition (OVAR). Unlike prior methods that treat actions as monolithic entities, our approach innovatively decomposes actions into discriminative sub-motions for fine-grained matching while dynamically invoking domain-specific tools for cross-modal interleaving, thereby enabling category-specific reasoning capacity and reducing cross-modal hallucination. Moreover, by designing a hierarchical reward that balances tool-usage efficiency, sub-motion relevance, and structural coherence in reasoning, our method autonomously leverages external tools to prioritize sub-motion patterns without explicit supervision, transmitting from text-centric reasoning to visually grounded inference. Extensive evaluations on HMDB-51, UCF-101, SSv2, Kinetics-400, and Kinetics-600 datasets demonstrate our state-of-the-art performance, outperforming existing methods in distinguishing fine-grained actions and handling cross-modal hallucination, validating our excellent robustness and generalization.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08449v1" target="_blank" rel="noopener noreferrer">
                用于高分辨率图像量化和特征提取的层次化空间算法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Hierarchical Spatial Algorithms for High-Resolution Image Quantization and Feature Extraction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Noor Islam S. Mohammad
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的图像量化和特征提取技术，虽然特征提取在广义上与推荐系统相关，但论文明确聚焦于高分辨率图像处理，这属于纯粹的视觉技术范畴。根据筛选标准，纯粹的视觉论文如果没有明确的推荐/搜索/广告应用相关性，应被视为低相关性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:56:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08449v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08449v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">68T45</span><span class="category-tag">68U10</span><span class="category-tag">I.4.8; I.2.10</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This study introduces a modular framework for spatial image processing, integrating grayscale quantization, color and brightness enhancement, image sharpening, bidirectional transformation pipelines, and geometric feature extraction. A stepwise intensity transformation quantizes grayscale images into eight discrete levels, producing a posterization effect that simplifies representation while preserving structural detail. Color enhancement is achieved via histogram equalization in both RGB and YCrCb color spaces, with the latter improving contrast while maintaining chrominance fidelity. Brightness adjustment is implemented through HSV value-channel manipulation, and image sharpening is performed using a 3 * 3 convolution kernel to enhance high-frequency details. A bidirectional transformation pipeline that integrates unsharp masking, gamma correction, and noise amplification achieved accuracy levels of 76.10% and 74.80% for the forward and reverse processes, respectively. Geometric feature extraction employed Canny edge detection, Hough-based line estimation (e.g., 51.50{\deg} for billiard cue alignment), Harris corner detection, and morphological window localization. Cue isolation further yielded 81.87\% similarity against ground truth images. Experimental evaluation across diverse datasets demonstrates robust and deterministic performance, highlighting its potential for real-time image analysis and computer vision.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08442v1" target="_blank" rel="noopener noreferrer">
                凝视目标：利用回报引导的对比学习塑造视觉注意力
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Gaze on the Prize: Shaping Visual Attention with Return-Guided Contrastive Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Andrew Lee, Ian Chuang, Dechen Gao, Kai Fukazawa, Iman Soltani
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉注意力机制和对比学习，虽然对比学习是LLM相关技术，但论文明确聚焦于视觉领域（'Visual Attention', 'Gaze'），缺乏与推荐系统、搜索或广告的明确关联。视觉注意力机制在推荐/搜索中主要用于处理图像内容，但论文标题未表明这种跨领域应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:54:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08442v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08442v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Visual Reinforcement Learning (RL) agents must learn to act based on high-dimensional image data where only a small fraction of the pixels is task-relevant. This forces agents to waste exploration and computational resources on irrelevant features, leading to sample-inefficient and unstable learning. To address this, inspired by human visual foveation, we introduce Gaze on the Prize. This framework augments visual RL with a learnable foveal attention mechanism (Gaze), guided by a self-supervised signal derived from the agent's experience pursuing higher returns (the Prize). Our key insight is that return differences reveal what matters most: If two similar representations produce different outcomes, their distinguishing features are likely task-relevant, and the gaze should focus on them accordingly. This is realized through return-guided contrastive learning that trains the attention to distinguish between the features relevant to success and failure. We group similar visual representations into positives and negatives based on their return differences and use the resulting labels to construct contrastive triplets. These triplets provide the training signal that teaches the attention mechanism to produce distinguishable representations for states associated with different outcomes. Our method achieves up to 2.4x improvement in sample efficiency and can solve tasks that the baseline fails to learn, demonstrated across a suite of manipulation tasks from the ManiSkill3 benchmark, all without modifying the underlying algorithm or hyperparameters.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08425v1" target="_blank" rel="noopener noreferrer">
                通过直接群体偏好优化强化扩散模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Reinforcing Diffusion Models by Direct Group Preference Optimization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yihong Luo, Tianyang Hu, Jing Tang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注扩散模型的偏好优化，属于生成模型领域，与我的核心关注点（推荐系统、搜索、广告的直接应用或使能技术）相关性较低。虽然偏好优化技术可能间接影响内容生成质量，但论文没有明确展示在推荐、搜索或广告中的直接应用潜力，且更偏向AIGC和内容生成领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:40:43
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08425v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08425v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    While reinforcement learning methods such as Group Relative Preference Optimization (GRPO) have significantly enhanced Large Language Models, adapting them to diffusion models remains challenging. In particular, GRPO demands a stochastic policy, yet the most cost-effective diffusion samplers are based on deterministic ODEs. Recent work addresses this issue by using inefficient SDE-based samplers to induce stochasticity, but this reliance on model-agnostic Gaussian noise leads to slow convergence. To resolve this conflict, we propose Direct Group Preference Optimization (DGPO), a new online RL algorithm that dispenses with the policy-gradient framework entirely. DGPO learns directly from group-level preferences, which utilize relative information of samples within groups. This design eliminates the need for inefficient stochastic policies, unlocking the use of efficient deterministic ODE samplers and faster training. Extensive results show that DGPO trains around 20 times faster than existing state-of-the-art methods and achieves superior performance on both in-domain and out-of-domain reward metrics. Code is available at https://github.com/Luo-Yihong/DGPO.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08398v1" target="_blank" rel="noopener noreferrer">
                VideoVerse：您的文本到视频生成器距离世界模型还有多远？
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VideoVerse: How Far is Your T2V Generator from a World Model?
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zeqing Wang, Xinyu Wei, Bairui Li, Zhen Guo, Jinrui Zhang, Hongyang Wei, Keze Wa...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注文本到视频生成技术及其与世界模型的比较，这属于纯粹的视觉内容生成领域。虽然世界模型概念在强化学习中很重要，但论文标题明确聚焦于视频生成评估，与推荐系统、搜索或广告中的排名、用户建模等核心任务没有直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:18:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08398v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08398v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The recent rapid advancement of Text-to-Video (T2V) generation technologies, which are critical to build ``world models'', makes the existing benchmarks increasingly insufficient to evaluate state-of-the-art T2V models. First, current evaluation dimensions, such as per-frame aesthetic quality and temporal consistency, are no longer able to differentiate state-of-the-art T2V models. Second, event-level temporal causality, which not only distinguishes video from other modalities but also constitutes a crucial component of world models, is severely underexplored in existing benchmarks. Third, existing benchmarks lack a systematic assessment of world knowledge, which are essential capabilities for building world models. To address these issues, we introduce VideoVerse, a comprehensive benchmark that focuses on evaluating whether a T2V model could understand complex temporal causality and world knowledge in the real world. We collect representative videos across diverse domains (e.g., natural landscapes, sports, indoor scenes, science fiction, chemical and physical experiments) and extract their event-level descriptions with inherent temporal causality, which are then rewritten into text-to-video prompts by independent annotators. For each prompt, we design a suite of binary evaluation questions from the perspective of dynamic and static properties, with a total of ten carefully defined evaluation dimensions. In total, our VideoVerse comprises 300 carefully curated prompts, involving 815 events and 793 binary evaluation questions. Consequently, a human preference aligned QA-based evaluation pipeline is developed by using modern vision-language models. Finally, we perform a systematic evaluation of state-of-the-art open-source and closed-source T2V models on VideoVerse, providing in-depth analysis on how far the current T2V generators are from world models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08394v1" target="_blank" rel="noopener noreferrer">
                神经场的光谱预滤波
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Spectral Prefiltering of Neural Fields
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mustafa B. Yaldiz, Ishit Mehta, Nithin Raghavan, Andreas Meuleman, Tzu-Mao Li, R...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注神经场（Neural Fields）的光谱预滤波技术，这属于计算机视觉和图形学领域的底层技术。虽然神经场在3D重建和表示方面有应用，但该技术本身与推荐系统、搜索或广告的核心进展缺乏直接关联，也没有明确的Transformer架构改进或LLM应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:15:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08394v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08394v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.GR</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Neural fields excel at representing continuous visual signals but typically operate at a single, fixed resolution. We present a simple yet powerful method to optimize neural fields that can be prefiltered in a single forward pass. Key innovations and features include: (1) We perform convolutional filtering in the input domain by analytically scaling Fourier feature embeddings with the filter's frequency response. (2) This closed-form modulation generalizes beyond Gaussian filtering and supports other parametric filters (Box and Lanczos) that are unseen at training time. (3) We train the neural field using single-sample Monte Carlo estimates of the filtered signal. Our method is fast during both training and inference, and imposes no additional constraints on the network architecture. We show quantitative and qualitative improvements over existing methods for neural-field filtering.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08377v1" target="_blank" rel="noopener noreferrer">
                UniVideo：面向视频的统一理解、生成与编辑
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UniVideo: Unified Understanding, Generation, and Editing for Videos
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Cong Wei, Quande Liu, Zixuan Ye, Qiulin Wang, Xintao Wang, Pengfei Wan, Kun Gai,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频领域的统一建模，虽然涉及多模态技术，但其核心应用场景是视频内容的理解、生成和编辑，与推荐系统、搜索或广告的关联性较弱。视频生成和编辑技术可能间接应用于广告创意生成，但这属于明确排除的非相关主题范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:01:30
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08377v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08377v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Unified multimodal models have shown promising results in multimodal content generation and editing but remain largely limited to the image domain. In this work, we present UniVideo, a versatile framework that extends unified modeling to the video domain. UniVideo adopts a dual-stream design, combining a Multimodal Large Language Model (MLLM) for instruction understanding with a Multimodal DiT (MMDiT) for video generation. This design enables accurate interpretation of complex multimodal instructions while preserving visual consistency. Built on this architecture, UniVideo unifies diverse video generation and editing tasks under a single multimodal instruction paradigm and is jointly trained across them. Extensive experiments demonstrate that UniVideo matches or surpasses state-of-the-art task-specific baselines in text/image-to-video generation, in-context video generation and in-context video editing. Notably, the unified design of UniVideo enables two forms of generalization. First, UniVideo supports task composition, such as combining editing with style transfer, by integrating multiple capabilities within a single instruction. Second, even without explicit training on free-form video editing, UniVideo transfers its editing capability from large-scale image editing data to this setting, handling unseen instructions such as green-screening characters or changing materials within a video. Beyond these core capabilities, UniVideo also supports visual-prompt-based video generation, where the MLLM interprets visual prompts and guides the MMDiT during synthesis. To foster future research, we will release our model and code.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08363v1" target="_blank" rel="noopener noreferrer">
                基于Transformer的扩散模型进行高光谱数据增强
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Hyperspectral data augmentation with transformer-based diffusion models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mattia Ferrari, Lorenzo Bruzzone
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注高光谱数据增强，这属于计算机视觉领域的特定应用，与推荐系统、搜索或广告的核心技术关联度极低。虽然使用了Transformer架构，但应用场景（高光谱数据）在推荐/搜索/广告领域几乎没有实际应用场景，因此整体相关性非常有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 15:50:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08363v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08363v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The introduction of new generation hyperspectral satellite sensors, combined with advancements in deep learning methodologies, has significantly enhanced the ability to discriminate detailed land-cover classes at medium-large scales. However, a significant challenge in deep learning methods is the risk of overfitting when training networks with small labeled datasets. In this work, we propose a data augmentation technique that leverages a guided diffusion model. To effectively train the model with a limited number of labeled samples and to capture complex patterns in the data, we implement a lightweight transformer network. Additionally, we introduce a modified weighted loss function and an optimized cosine variance scheduler, which facilitate fast and effective training on small datasets. We evaluate the effectiveness of the proposed method on a forest classification task with 10 different forest types using hyperspectral images acquired by the PRISMA satellite. The results demonstrate that the proposed method outperforms other data augmentation techniques in both average and weighted average accuracy. The effectiveness of the method is further highlighted by the stable training behavior of the model, which addresses a common limitation in the practical application of deep generative models for data augmentation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08352v1" target="_blank" rel="noopener noreferrer">
                评估小型视觉语言模型在距离相关交通感知任务上的性能
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Evaluating Small Vision-Language Models on Distance-Dependent Traffic Perception
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nikos Theodoridis, Tim Brophy, Reenu Mohandas, Ganesh Sistu, Fiachra Collins, An...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然论文涉及视觉语言模型(VLM)，但其焦点是交通感知这一特定领域应用，与推荐系统、搜索或广告的核心技术缺乏直接关联。该研究主要针对计算机视觉任务，没有展示在异构数据处理或推荐系统应用方面的潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 15:38:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08352v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08352v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision-Language Models (VLMs) are becoming increasingly powerful, demonstrating strong performance on a variety of tasks that require both visual and textual understanding. Their strong generalisation abilities make them a promising component for automated driving systems, which must handle unexpected corner cases. However, to be trusted in such safety-critical applications, a model must first possess a reliable perception system. Moreover, since critical objects and agents in traffic scenes are often at a distance, we require systems that are not "shortsighted", i.e., systems with strong perception capabilities at both close (up to 20 meters) and long (30+ meters) range. With this in mind, we introduce Distance-Annotated Traffic Perception Question Answering (DTPQA), the first Visual Question Answering (VQA) benchmark focused solely on perception-based questions in traffic scenes, enriched with distance annotations. By excluding questions that require reasoning, we ensure that model performance reflects perception capabilities alone. Since automated driving hardware has limited processing power and cannot support large VLMs, our study centers on smaller VLMs. More specifically, we evaluate several state-of-the-art (SOTA) small VLMs on DTPQA and show that, despite the simplicity of the questions, these models significantly underperform compared to humans (~60% average accuracy for the best-performing small VLM versus ~85% human performance). However, it is important to note that the human sample size was relatively small, which imposes statistical limitations. We also identify specific perception tasks, such as distinguishing left from right, that remain particularly challenging for these models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08316v1" target="_blank" rel="noopener noreferrer">
                利用2D语义知识解锁3D功能可供性分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Unlocking 3D Affordance Segmentation with 2D Semantic Knowledge
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yu Huang, Zelin Peng, Changsong Wen, Xiaokang Yang, Wei Shen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D视觉中的功能可供性分割，属于纯粹的计算机视觉研究领域。虽然提到了语义知识，但主要应用于3D场景理解，与推荐系统、搜索或广告的核心技术栈没有明确的直接关联或潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 15:01:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08316v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08316v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Affordance segmentation aims to parse 3D objects into functionally distinct parts, bridging recognition and interaction for applications in robotic manipulation, embodied AI, and AR. While recent studies leverage visual or textual prompts to guide this process, they often rely on point cloud encoders as generic feature extractors, overlooking the intrinsic challenges of 3D data such as sparsity, noise, and geometric ambiguity. As a result, 3D features learned in isolation frequently lack clear and semantically consistent functional boundaries. To address this bottleneck, we propose a semantic-grounded learning paradigm that transfers rich semantic knowledge from large-scale 2D Vision Foundation Models (VFMs) into the 3D domain. Specifically, We introduce Cross-Modal Affinity Transfer (CMAT), a pre-training strategy that aligns a 3D encoder with lifted 2D semantics and jointly optimizes reconstruction, affinity, and diversity to yield semantically organized representations. Building on this backbone, we further design the Cross-modal Affordance Segmentation Transformer (CAST), which integrates multi-modal prompts with CMAT-pretrained features to generate precise, prompt-aware segmentation maps. Extensive experiments on standard benchmarks demonstrate that our framework establishes new state-of-the-art results for 3D affordance segmentation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08305v1" target="_blank" rel="noopener noreferrer">
                LTCA：用于参考视频对象分割的长程时序上下文注意力机制
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LTCA: Long-range Temporal Context Attention for Referring Video Object Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Cilin Yan, Jingyun Wang, Guoliang Kang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的视频对象分割任务，虽然涉及注意力机制，但其核心应用场景（视频对象分割）与推荐系统、搜索或广告没有直接关联。长程时序建模技术理论上可能对处理用户行为序列有启发，但这种跨领域应用过于间接且不明确。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:55:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08305v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08305v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Referring Video Segmentation (RVOS) aims to segment objects in videos given linguistic expressions. The key to solving RVOS is to extract long-range temporal context information from the interactions of expressions and videos to depict the dynamic attributes of each object. Previous works either adopt attention across all the frames or stack dense local attention to achieve a global view of temporal context. However, they fail to strike a good balance between locality and globality, and the computation complexity significantly increases with the increase of video length. In this paper, we propose an effective long-range temporal context attention (LTCA) mechanism to aggregate global context information into object features. Specifically, we aggregate the global context information from two aspects. Firstly, we stack sparse local attentions to balance the locality and globality. We design a dilated window attention across frames to aggregate local context information and perform such attention in a stack of layers to enable a global view. Further, we enable each query to attend to a small group of keys randomly selected from a global pool to enhance the globality. Secondly, we design a global query to interact with all the other queries to directly encode the global context information. Experiments show our method achieves new state-of-the-art on four referring video segmentation benchmarks. Notably, our method shows an improvement of 11.3% and 8.3% on the MeViS valu and val datasets respectively.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08279v1" target="_blank" rel="noopener noreferrer">
                学习神经曝光场用于视图合成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Learning Neural Exposure Fields for View Synthesis
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Michael Niemeyer, Fabian Manhardt, Marie-Julie Rakotosaona, Michael Oechsle, Chr...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文主要关注计算机视觉中的视图合成和神经渲染技术，属于纯粹的视觉领域研究。虽然神经场技术在3D表示方面有创新，但没有明确展示在推荐系统、搜索或广告领域的潜在应用，与当前关注的核心领域距离较远。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:32:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08279v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08279v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in neural scene representations have led to unprecedented quality in 3D reconstruction and view synthesis. Despite achieving high-quality results for common benchmarks with curated data, outputs often degrade for data that contain per image variations such as strong exposure changes, present, e.g., in most scenes with indoor and outdoor areas or rooms with windows. In this paper, we introduce Neural Exposure Fields (NExF), a novel technique for robustly reconstructing 3D scenes with high quality and 3D-consistent appearance from challenging real-world captures. In the core, we propose to learn a neural field predicting an optimal exposure value per 3D point, enabling us to optimize exposure along with the neural scene representation. While capture devices such as cameras select optimal exposure per image/pixel, we generalize this concept and perform optimization in 3D instead. This enables accurate view synthesis in high dynamic range scenarios, bypassing the need of post-processing steps or multi-exposure captures. Our contributions include a novel neural representation for exposure prediction, a system for joint optimization of the scene representation and the exposure field via a novel neural conditioning mechanism, and demonstrated superior performance on challenging real-world data. We find that our approach trains faster than prior works and produces state-of-the-art results on several benchmarks improving by over 55% over best-performing baselines.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08278v1" target="_blank" rel="noopener noreferrer">
                一种用于具身参考理解的多模态深度感知方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Multimodal Depth-Aware Method For Embodied Reference Understanding
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fevziye Irem Eyiokur, Dogucan Yaman, Hazım Kemal Ekenel, Alexander Waibel
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要涉及具身AI和深度感知，属于计算机视觉和机器人领域。虽然提到了多模态，但核心焦点是具身参考理解（机器人导航和交互），与推荐系统、搜索或广告的直接相关性较弱。其技术可能间接应用于需要空间理解的场景，但应用潜力有限且不明确。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:32:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08278v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08278v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.HC</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Embodied Reference Understanding requires identifying a target object in a visual scene based on both language instructions and pointing cues. While prior works have shown progress in open-vocabulary object detection, they often fail in ambiguous scenarios where multiple candidate objects exist in the scene. To address these challenges, we propose a novel ERU framework that jointly leverages LLM-based data augmentation, depth-map modality, and a depth-aware decision module. This design enables robust integration of linguistic and embodied cues, improving disambiguation in complex or cluttered environments. Experimental results on two datasets demonstrate that our approach significantly outperforms existing baselines, achieving more accurate and reliable referent detection.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08143v1" target="_blank" rel="noopener noreferrer">
                UniMMVSR：一种用于级联视频超分辨率的统一多模态框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UniMMVSR: A Unified Multi-Modal Framework for Cascaded Video Super-Resolution
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shian Du, Menghan Xia, Chang Liu, Quande Liu, Xintao Wang, Pengfei Wan, Xiangyan...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频超分辨率这一计算机视觉任务，虽然涉及多模态框架，但其核心应用场景是视频质量增强而非推荐、搜索或广告系统。视频超分辨率技术对RecSys/Search/Ads的潜在应用非常有限，可能仅在某些需要高质量视频预览的电商场景中有微弱关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 12:25:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08143v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08143v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Cascaded video super-resolution has emerged as a promising technique for decoupling the computational burden associated with generating high-resolution videos using large foundation models. Existing studies, however, are largely confined to text-to-video tasks and fail to leverage additional generative conditions beyond text, which are crucial for ensuring fidelity in multi-modal video generation. We address this limitation by presenting UniMMVSR, the first unified generative video super-resolution framework to incorporate hybrid-modal conditions, including text, images, and videos. We conduct a comprehensive exploration of condition injection strategies, training schemes, and data mixture techniques within a latent video diffusion model. A key challenge was designing distinct data construction and condition utilization methods to enable the model to precisely utilize all condition types, given their varied correlations with the target video. Our experiments demonstrate that UniMMVSR significantly outperforms existing methods, producing videos with superior detail and a higher degree of conformity to multi-modal conditions. We also validate the feasibility of combining UniMMVSR with a base model to achieve multi-modal guided generation of 4K video, a feat previously unattainable with existing techniques.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08138v1" target="_blank" rel="noopener noreferrer">
                通过注意力增强改进视频语言模型中的时序理解逻辑一致性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Improving Temporal Understanding Logic Consistency in Video-Language Models via Attention Enhancement
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chengzhi Li, Heyan Huang, Ping Jian, Zhen Yang, Yaning Tian
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然这篇论文涉及多模态建模和注意力机制，但其核心焦点是视频-语言模型中的时序理解，这与推荐系统、搜索或广告的直接应用相关性较弱。注意力增强技术可能有潜在的效率改进，但论文主要针对视频时序逻辑而非推荐/搜索场景中的异构数据处理。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 12:22:06
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08138v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08138v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.MM</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) often generate self-contradictory outputs, which severely impacts their reliability and hinders their adoption in practical applications. In video-language models (Video-LLMs), this phenomenon recently draws the attention of researchers. Specifically, these models fail to provide logically consistent responses to rephrased questions based on their grounding outputs. However, the underlying causes of this phenomenon remain underexplored. In this work, we adopt an interpretability-driven approach to analyze, statistically summarize, and intervention the potential factors of the phenomenon. We find that one of the primary reasons for the inconsistency in responses lies in the inability of cross-modal attention heads to effectively distinguish video tokens across different timestamps. To address this, we propose an attention enhancement method called Temporally Conditioned Attention Sharpening (TCAS), which constructs an enhancement objective based on attention distinctions to enhance the model's temporal resolution capability, thereby improving its temporal understanding logic consistency. Experimental results demonstrate that our method significantly enhances the temporal logic consistency of Video-LLMs. Further interpretability analyses reveal that our method indeed improves the temporal discriminability of attention heads, validating our conclusions. Additionally, our method achieves performance improvements in general video temporal grounding tasks, highlighting that temporal logic consistency is a bottleneck in temporal understanding. By enhancing consistency, our method drives significant progress in video temporal understanding.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08017v1" target="_blank" rel="noopener noreferrer">
                RayFusion：射线融合增强的协同视觉感知
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RayFusion: Ray Fusion Enhanced Collaborative Visual Perception
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shaohong Wang, Bin Lu, Xinyu Xiao, Hanzhi Zhong, Bowen Pang, Tong Wang, Zhiyu Xi...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的协同感知技术，虽然涉及多模态融合概念，但主要应用于自动驾驶或机器人视觉等场景。在推荐系统、搜索或广告领域，这种射线融合的视觉感知技术缺乏明确的直接应用场景，与异构数据统一建模的关联性较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 09:54:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08017v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08017v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Collaborative visual perception methods have gained widespread attention in the autonomous driving community in recent years due to their ability to address sensor limitation problems. However, the absence of explicit depth information often makes it difficult for camera-based perception systems, e.g., 3D object detection, to generate accurate predictions. To alleviate the ambiguity in depth estimation, we propose RayFusion, a ray-based fusion method for collaborative visual perception. Using ray occupancy information from collaborators, RayFusion reduces redundancy and false positive predictions along camera rays, enhancing the detection performance of purely camera-based collaborative perception systems. Comprehensive experiments show that our method consistently outperforms existing state-of-the-art models, substantially advancing the performance of collaborative visual perception. The code is available at https://github.com/wangsh0111/RayFusion.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07984v1" target="_blank" rel="noopener noreferrer">
                架构复杂性总是答案吗？SwinIR与高效CNN的案例研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Is Architectural Complexity Always the Answer? A Case Study on SwinIR vs. an Efficient CNN
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chandresh Sutariya, Nitin Singh
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要比较SwinIR（基于Transformer的架构）与高效CNN在图像恢复任务中的表现，属于计算机视觉领域的架构效率研究。虽然涉及Transformer架构，但论文聚焦于图像恢复这一特定视觉任务，没有明确展示在推荐系统、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 09:16:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07984v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07984v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The simultaneous restoration of high-frequency details and suppression of severe noise in low-light imagery presents a significant and persistent challenge in computer vision. While large-scale Transformer models like SwinIR have set the state of the art in performance, their high computational cost can be a barrier for practical applications. This paper investigates the critical trade-off between performance and efficiency by comparing the state-of-the-art SwinIR model against a standard, lightweight Convolutional Neural Network (CNN) on this challenging task. Our experimental results reveal a nuanced but important finding. While the Transformer-based SwinIR model achieves a higher peak performance, with a Peak Signal-to-Noise Ratio (PSNR) of 39.03 dB, the lightweight CNN delivers a surprisingly competitive PSNR of 37.4 dB. Crucially, the CNN reached this performance after converging in only 10 epochs of training, whereas the more complex SwinIR model required 132 epochs. This efficiency is further underscored by the model's size; the CNN is over 55 times smaller than SwinIR. This work demonstrates that a standard CNN can provide a near state-of-the-art result with significantly lower computational overhead, presenting a compelling case for its use in real-world scenarios where resource constraints are a primary concern.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07944v1" target="_blank" rel="noopener noreferrer">
                CVD-STORM：面向自动驾驶的具有时空重建模型的跨视角视频扩散
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CVD-STORM: Cross-View Video Diffusion with Spatial-Temporal Reconstruction Model for Autonomous Driving
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tianrui Zhang, Yichen Liu, Zilin Guo, Yuxin Guo, Jingcheng Ni, Chenjing Ding, Da...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注自动驾驶领域的视频生成和时空建模，属于计算机视觉应用范畴。虽然视频扩散模型在技术上有一定先进性，但论文标题明确指向自动驾驶这一特定领域应用，与推荐系统、搜索或广告的核心技术焦点缺乏直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:41:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07944v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07944v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Generative models have been widely applied to world modeling for environment simulation and future state prediction. With advancements in autonomous driving, there is a growing demand not only for high-fidelity video generation under various controls, but also for producing diverse and meaningful information such as depth estimation. To address this, we propose CVD-STORM, a cross-view video diffusion model utilizing a spatial-temporal reconstruction Variational Autoencoder (VAE) that generates long-term, multi-view videos with 4D reconstruction capabilities under various control inputs. Our approach first fine-tunes the VAE with an auxiliary 4D reconstruction task, enhancing its ability to encode 3D structures and temporal dynamics. Subsequently, we integrate this VAE into the video diffusion process to significantly improve generation quality. Experimental results demonstrate that our model achieves substantial improvements in both FID and FVD metrics. Additionally, the jointly-trained Gaussian Splatting Decoder effectively reconstructs dynamic scenes, providing valuable geometric information for comprehensive scene understanding.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07915v1" target="_blank" rel="noopener noreferrer">
                MARC：用于高效视频理解的内存增强强化学习令牌压缩
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MARC: Memory-Augmented RL Token Compression for Efficient Video Understanding
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Peiran Wu, Zhuorui Yu, Yunze Liu, Chi-Hao Wu, Enmin Zhou, Junxiao Shen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频理解的效率优化，属于计算机视觉领域，与推荐系统、搜索或广告的核心技术关联性较弱。虽然压缩技术理论上可能应用于多模态推荐中的视频内容处理，但论文的强化学习和视频理解焦点使其与当前关注点的直接相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:07:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07915v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07915v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid progress of large language models (LLMs) has laid the foundation for multimodal models. However, visual language models (VLMs) still face heavy computational costs when extended from images to videos due to high frame rates and long durations. Token compression is a promising solution, yet most existing training-free methods cause information loss and performance degradation. To overcome this, we propose \textbf{Memory-Augmented Reinforcement Learning-based Token Compression (MARC)}, which integrates structured retrieval and RL-based distillation. MARC adopts a \textit{retrieve-then-compress} strategy using a \textbf{Visual Memory Retriever (VMR)} to select key clips and a \textbf{Compression Group Relative Policy Optimization (C-GRPO)} framework to distil reasoning ability from a teacher to a student model. Experiments on six video benchmarks show that MARC achieves near-baseline accuracy using only one frame's tokens -- reducing visual tokens by \textbf{95\%}, GPU memory by \textbf{72\%}, and latency by \textbf{23.9\%}. This demonstrates its potential for efficient, real-time video understanding in resource-constrained settings such as video QA, surveillance, and autonomous driving.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07839v1" target="_blank" rel="noopener noreferrer">
                AlignGS：对齐几何与语义以实现稀疏视图下的鲁棒室内重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AlignGS: Aligning Geometry and Semantics for Robust Indoor Reconstruction from Sparse Views
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yijie Gao, Houqiang Zhong, Tianchi Zhu, Zhengxue Cheng, Qiang Hu, Li Song
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉中的3D室内重建问题，属于纯粹的视觉领域研究。虽然提到了语义对齐，但其核心是几何重建技术，与推荐系统、搜索或广告的排名和建模需求没有直接关联。该技术缺乏在RecSys/Search/Ads领域的明显应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 06:30:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07839v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07839v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The demand for semantically rich 3D models of indoor scenes is rapidly growing, driven by applications in augmented reality, virtual reality, and robotics. However, creating them from sparse views remains a challenge due to geometric ambiguity. Existing methods often treat semantics as a passive feature painted on an already-formed, and potentially flawed, geometry. We posit that for robust sparse-view reconstruction, semantic understanding instead be an active, guiding force. This paper introduces AlignGS, a novel framework that actualizes this vision by pioneering a synergistic, end-to-end optimization of geometry and semantics. Our method distills rich priors from 2D foundation models and uses them to directly regularize the 3D representation through a set of novel semantic-to-geometry guidance mechanisms, including depth consistency and multi-faceted normal regularization. Extensive evaluations on standard benchmarks demonstrate that our approach achieves state-of-the-art results in novel view synthesis and produces reconstructions with superior geometric accuracy. The results validate that leveraging semantic priors as a geometric regularizer leads to more coherent and complete 3D models from limited input views. Our code is avaliable at https://github.com/MediaX-SJTU/AlignGS .
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07828v1" target="_blank" rel="noopener noreferrer">
                MMHOI：建模复杂的三维多人与多物体交互
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MMHOI: Modeling Complex 3D Multi-Human Multi-Object Interactions
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kaen Kogashi, Anoop Cherian, Meng-Yu Jennifer Kuo
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D视觉中复杂交互的建模，属于纯粹的计算机视觉领域。虽然建模复杂交互的技术在概念上可能与推荐系统中的用户-物品交互有类比，但论文标题明确限定在3D视觉场景，没有显示出与推荐系统、搜索或广告的直接关联或潜在应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 06:18:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07828v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07828v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Real-world scenes often feature multiple humans interacting with multiple objects in ways that are causal, goal-oriented, or cooperative. Yet existing 3D human-object interaction (HOI) benchmarks consider only a fraction of these complex interactions. To close this gap, we present MMHOI -- a large-scale, Multi-human Multi-object Interaction dataset consisting of images from 12 everyday scenarios. MMHOI offers complete 3D shape and pose annotations for every person and object, along with labels for 78 action categories and 14 interaction-specific body parts, providing a comprehensive testbed for next-generation HOI research. Building on MMHOI, we present MMHOI-Net, an end-to-end transformer-based neural network for jointly estimating human-object 3D geometries, their interactions, and associated actions. A key innovation in our framework is a structured dual-patch representation for modeling objects and their interactions, combined with action recognition to enhance the interaction prediction. Experiments on MMHOI and the recently proposed CORE4D datasets demonstrate that our approach achieves state-of-the-art performance in multi-HOI modeling, excelling in both accuracy and reconstruction quality.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07817v1" target="_blank" rel="noopener noreferrer">
                面向室内全景图像的端到端房间几何约束深度估计框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            An End-to-End Room Geometry Constrained Depth Estimation Framework for Indoor Panorama Images
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kanglin Ning, Ruzhao Chen, Penghong Wang, Xingtao Wang, Ruiqin Xiong, Xiaopeng F...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的深度估计任务，特别是针对室内全景图像，这属于纯粹的视觉处理范畴。虽然深度估计在增强现实和机器人导航中有应用，但与推荐系统、搜索或广告的核心技术缺乏直接关联，也没有涉及LLM、Transformer架构或异构数据建模等焦点领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 05:52:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07817v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07817v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Predicting spherical pixel depth from monocular $360^{\circ}$ indoor panoramas is critical for many vision applications. However, existing methods focus on pixel-level accuracy, causing oversmoothed room corners and noise sensitivity. In this paper, we propose a depth estimation framework based on room geometry constraints, which extracts room geometry information through layout prediction and integrates those information into the depth estimation process through background segmentation mechanism. At the model level, our framework comprises a shared feature encoder followed by task-specific decoders for layout estimation, depth estimation, and background segmentation. The shared encoder extracts multi-scale features, which are subsequently processed by individual decoders to generate initial predictions: a depth map, a room layout map, and a background segmentation map. Furthermore, our framework incorporates two strategies: a room geometry-based background depth resolving strategy and a background-segmentation-guided fusion mechanism. The proposed room-geometry-based background depth resolving strategy leverages the room layout and the depth decoder's output to generate the corresponding background depth map. Then, a background-segmentation-guided fusion strategy derives fusion weights for the background and coarse depth maps from the segmentation decoder's predictions. Extensive experimental results on the Stanford2D3D, Matterport3D and Structured3D datasets show that our proposed methods can achieve significantly superior performance than current open-source methods. Our code is available at https://github.com/emiyaning/RGCNet.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07791v1" target="_blank" rel="noopener noreferrer">
                GTR-Bench：评估视觉语言模型中的地理时空推理能力
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            GTR-Bench: Evaluating Geo-Temporal Reasoning in Vision-Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qinghongbing Xie, Zhaoyuan Xia, Feng Zhu, Lijun Gong, Ziyue Li, Rui Zhao, Long Z...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然该论文涉及视觉语言模型评估，但其焦点是地理时空推理这一特定能力，与推荐系统、搜索或广告的核心技术关联较弱。地理时空推理在本地化推荐中可能有潜在应用，但论文主要关注评估基准而非直接的技术应用或架构创新。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 05:09:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07791v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07791v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recently spatial-temporal intelligence of Visual-Language Models (VLMs) has attracted much attention due to its importance for Autonomous Driving, Embodied AI and General Artificial Intelligence. Existing spatial-temporal benchmarks mainly focus on egocentric perspective reasoning with images/video context, or geographic perspective reasoning with graphics context (eg. a map), thus fail to assess VLMs' geographic spatial-temporal intelligence with both images/video and graphics context, which is important for areas like traffic management and emergency response. To address the gaps, we introduce Geo-Temporal Reasoning benchmark (GTR-Bench), a novel challenge for geographic temporal reasoning of moving targets in a large-scale camera network. GTR-Bench is more challenging as it requires multiple perspective switches between maps and videos, joint reasoning across multiple videos with non-overlapping fields of view, and inference over spatial-temporal regions that are unobserved by any video context. Evaluations of more than 10 popular VLMs on GTR-Bench demonstrate that even the best proprietary model, Gemini-2.5-Pro (34.9%), significantly lags behind human performance (78.61%) on geo-temporal reasoning. Moreover, our comprehensive analysis on GTR-Bench reveals three primary deficiencies of current models for geo-temporal reasoning. (1) VLMs' reasoning is impaired by an imbalanced utilization of spatial-temporal context. (2) VLMs are weak in temporal forecasting, which leads to worse performance on temporal-emphasized tasks than on spatial-emphasized tasks. (3) VLMs lack the proficiency to comprehend or align the map data with multi-view video inputs. We believe GTR-Bench offers valuable insights and opens up new opportunities for research and applications in spatial-temporal intelligence. Benchmark and code will be released at https://github.com/X-Luffy/GTR-Bench.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07778v1" target="_blank" rel="noopener noreferrer">
                IntentionVLA：面向人机交互的可泛化高效具身意图推理
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            IntentionVLA: Generalizable and Efficient Embodied Intention Reasoning for Human-Robot Interaction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yandu Chen, Kefan Gu, Yuqing Wen, Yucheng Zhao, Tiancai Wang, Liqiang Nie
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于机器人领域的具身意图推理和人机交互，属于特定领域应用。虽然涉及多模态理解和意图建模，但其核心应用场景（机器人交互）与推荐系统、搜索或广告领域缺乏直接关联，且未明确展示在推荐/搜索/广告中的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 04:49:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07778v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07778v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision-Language-Action (VLA) models leverage pretrained vision-language models (VLMs) to couple perception with robotic control, offering a promising path toward general-purpose embodied intelligence. However, current SOTA VLAs are primarily pretrained on multimodal tasks with limited relevance to embodied scenarios, and then finetuned to map explicit instructions to actions. Consequently, due to the lack of reasoning-intensive pretraining and reasoning-guided manipulation, these models are unable to perform implicit human intention reasoning required for complex, real-world interactions. To overcome these limitations, we propose \textbf{IntentionVLA}, a VLA framework with a curriculum training paradigm and an efficient inference mechanism. Our proposed method first leverages carefully designed reasoning data that combine intention inference, spatial grounding, and compact embodied reasoning, endowing the model with both reasoning and perception capabilities. In the following finetuning stage, IntentionVLA employs the compact reasoning outputs as contextual guidance for action generation, enabling fast inference under indirect instructions. Experimental results show that IntentionVLA substantially outperforms $\pi_0$, achieving 18\% higher success rates with direct instructions and 28\% higher than ECoT under intention instructions. On out-of-distribution intention tasks, IntentionVLA achieves over twice the success rate of all baselines, and further enables zero-shot human-robot interaction with 40\% success rate. These results highlight IntentionVLA as a promising paradigm for next-generation human-robot interaction (HRI) systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07741v1" target="_blank" rel="noopener noreferrer">
                UltraLED：学习在超高动态范围场景中看到一切
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UltraLED: Learning to See Everything in Ultra-High Dynamic Range Scenes
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuang Meng, Xin Jin, Lina Lei, Chun-Le Guo, Chongyi Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的超高动态范围成像技术，属于纯粹的视觉处理领域。虽然视觉技术在搜索和推荐中有潜在应用（如图像搜索），但论文标题没有表明与推荐系统、搜索或广告的直接关联，也没有涉及LLM或Transformer架构的进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 03:29:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07741v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07741v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Ultra-high dynamic range (UHDR) scenes exhibit significant exposure disparities between bright and dark regions. Such conditions are commonly encountered in nighttime scenes with light sources. Even with standard exposure settings, a bimodal intensity distribution with boundary peaks often emerges, making it difficult to preserve both highlight and shadow details simultaneously. RGB-based bracketing methods can capture details at both ends using short-long exposure pairs, but are susceptible to misalignment and ghosting artifacts. We found that a short-exposure image already retains sufficient highlight detail. The main challenge of UHDR reconstruction lies in denoising and recovering information in dark regions. In comparison to the RGB images, RAW images, thanks to their higher bit depth and more predictable noise characteristics, offer greater potential for addressing this challenge. This raises a key question: can we learn to see everything in UHDR scenes using only a single short-exposure RAW image? In this study, we rely solely on a single short-exposure frame, which inherently avoids ghosting and motion blur, making it particularly robust in dynamic scenes. To achieve that, we introduce UltraLED, a two-stage framework that performs exposure correction via a ratio map to balance dynamic range, followed by a brightness-aware RAW denoiser to enhance detail recovery in dark regions. To support this setting, we design a 9-stop bracketing pipeline to synthesize realistic UHDR images and contribute a corresponding dataset based on diverse scenes, using only the shortest exposure as input for reconstruction. Extensive experiments show that UltraLED significantly outperforms existing single-frame approaches. Our code and dataset are made publicly available at https://srameo.github.io/projects/ultraled.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07728v1" target="_blank" rel="noopener noreferrer">
                谁窃取了您的数据？一种检测未经授权的RAG窃取的方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Who Stole Your Data? A Method for Detecting Unauthorized RAG Theft
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Peiyang Liu, Ziqiang Cui, Di Liang, Wei Ye
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注数据安全和未经授权访问的检测，这属于隐私和安全领域，被明确列为不相关主题。虽然提到了RAG（检索增强生成），但核心焦点是安全检测而非RAG在推荐系统或搜索中的应用。该研究没有提供在推荐、搜索或广告领域的潜在技术应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 03:09:18
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07728v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07728v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Retrieval-augmented generation (RAG) enhances Large Language Models (LLMs) by mitigating hallucinations and outdated information issues, yet simultaneously facilitates unauthorized data appropriation at scale. This paper addresses this challenge through two key contributions. First, we introduce RPD, a novel dataset specifically designed for RAG plagiarism detection that encompasses diverse professional domains and writing styles, overcoming limitations in existing resources. Second, we develop a dual-layered watermarking system that embeds protection at both semantic and lexical levels, complemented by an interrogator-detective framework that employs statistical hypothesis testing on accumulated evidence. Extensive experimentation demonstrates our approach's effectiveness across varying query volumes, defense prompts, and retrieval parameters, while maintaining resilience against adversarial evasion techniques. This work establishes a foundational framework for intellectual property protection in retrieval-augmented AI systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08569v1" target="_blank" rel="noopener noreferrer">
                ArenaBencher：通过多模型竞争性评估实现自动基准演化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ArenaBencher: Automatic Benchmark Evolution via Multi-Model Competitive Evaluation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qin Liu, Jacob Dineen, Yuxi Huang, Sheng Zhang, Hoifung Poon, Ben Zhou, Muhao Ch...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注自动基准评估和演化，属于纯粹的评估基准研究，与我的关注点无关。论文标题表明其核心是基准测试方法学，没有涉及推荐系统、搜索或广告的核心进展，也没有LLM技术或Transformer架构的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08569v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08569v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Benchmarks are central to measuring the capabilities of large language models and guiding model development, yet widespread data leakage from pretraining corpora undermines their validity. Models can match memorized content rather than demonstrate true generalization, which inflates scores, distorts cross-model comparisons, and misrepresents progress. We introduce ArenaBencher, a model-agnostic framework for automatic benchmark evolution that updates test cases while preserving comparability. Given an existing benchmark and a diverse pool of models to be evaluated, ArenaBencher infers the core ability of each test case, generates candidate question-answer pairs that preserve the original objective, verifies correctness and intent with an LLM as a judge, and aggregates feedback from multiple models to select candidates that expose shared weaknesses. The process runs iteratively with in-context demonstrations that steer generation toward more challenging and diagnostic cases. We apply ArenaBencher to math problem solving, commonsense reasoning, and safety domains and show that it produces verified, diverse, and fair updates that uncover new failure modes, increase difficulty while preserving test objective alignment, and improve model separability. The framework provides a scalable path to continuously evolve benchmarks in step with the rapid progress of foundation models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08511v1" target="_blank" rel="noopener noreferrer">
                AutoMLGen：为编码智能体导航细粒度优化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AutoMLGen: Navigating Fine-Grained Optimization for Coding Agents
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shangheng Du, Xiangchao Yan, Dengyang Jiang, Jiakang Yuan, Yusong Hu, Xin Li, Li...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确涉及AutoML（自动机器学习）和编码智能体，这属于明确的无关主题。AutoML被明确列为无关主题，而编码智能体主要涉及代码生成和编程任务，与推荐系统、搜索或广告的核心技术没有直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:45:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08511v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08511v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) have shown impressive performance in general programming tasks. However, in Machine Learning Engineering (MLE) scenarios such as AutoML and Kaggle competitions, achieving high performance depends heavily on expert intervention and repeated adjustments rather than simply generating correct code. When applied directly to these tasks, LLMs often lack fine-grained domain priors, and existing MLE approaches that use linear or tree-structured searches limit knowledge transfer to adjacent hierarchical links. As a result, they cannot leverage past full trajectories or share information across branches, limiting self-evolving ability and search space diversity. To address these limitations, we introduce AutoMLGen, an LLM-based coding agent that integrates a domain knowledge base for high-quality prior guidance and Monte Carlo Graph Search (MCGS) for efficient exploration. MCGS retains the tree-guided exploration of MCTS while embedding a graph structure into the expansion stage to enable dynamic path reorganization, historical trajectory reuse, and multi-solution fusion to support both self-evolution and collaborative learning. Combined with fine-grained operator sets, this design improves stability and accelerates convergence. Evaluation on the MLE-Bench shows that AutoMLGen achieves state-of-the-art performance in numerous dimensions, such as the average medal rate and the valid submission rate, under a 12-hour budget (half the standard runtime). The code is available at https://github.com/Alpha-Innovator/InternAgent.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08482v1" target="_blank" rel="noopener noreferrer">
                视觉象似性挑战：评估视觉语言模型在手语形式-意义映射上的表现
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            The Visual Iconicity Challenge: Evaluating Vision-Language Models on Sign Language Form-Meaning Mapping
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Onur Keleş, Aslı Özyürek, Gerardo Ortega, Kadir Gökgö, Esam Ghaleb
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于手语视觉语言模型的评估，属于特定领域的视觉语言应用。虽然涉及视觉语言模型，但手语处理与推荐系统、搜索或广告的核心技术需求相距甚远，没有明显的技术迁移潜力。论文关注的是语言学的形式-意义映射问题，而非能应用于异构数据建模的通用VLM技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:21:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08482v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08482v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Iconicity, the resemblance between linguistic form and meaning, is pervasive in signed languages, offering a natural testbed for visual grounding. For vision-language models (VLMs), the challenge is to recover such essential mappings from dynamic human motion rather than static context. We introduce the \textit{Visual Iconicity Challenge}, a novel video-based benchmark that adapts psycholinguistic measures to evaluate VLMs on three tasks: (i) phonological sign-form prediction (e.g., handshape, location), (ii) transparency (inferring meaning from visual form), and (iii) graded iconicity ratings. We assess $13$ state-of-the-art VLMs in zero- and few-shot settings on Sign Language of the Netherlands and compare them to human baselines. On \textit{phonological form prediction}, VLMs recover some handshape and location detail but remain below human performance; on \textit{transparency}, they are far from human baselines; and only top models correlate moderately with human \textit{iconicity ratings}. Interestingly, \textit{models with stronger phonological form prediction correlate better with human iconicity judgment}, indicating shared sensitivity to visually grounded structure. Our findings validate these diagnostic tasks and motivate human-centric signals and embodied learning methods for modelling iconicity and improving visual grounding in multimodal models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08460v1" target="_blank" rel="noopener noreferrer">
                LeWiDi-2025 在 NLPerspectives：第三届“学习与分歧”共享任务的第三版
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LeWiDi-2025 at NLPerspectives: The Third Edition of the Learning with Disagreements Shared Task
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Elisa Leonardelli, Silvia Casola, Siyao Peng, Giulia Rizzi, Valerio Basile, Elis...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确指向NLP领域的共享任务，专注于处理分歧的学习任务，这属于纯粹的NLP评估基准范畴。根据筛选标准，评估基准和纯粹NLP中心主题属于不相关领域，与推荐系统、搜索或广告的核心技术进展没有任何直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:04:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08460v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08460v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Many researchers have reached the conclusion that AI models should be trained to be aware of the possibility of variation and disagreement in human judgments, and evaluated as per their ability to recognize such variation. The LEWIDI series of shared tasks on Learning With Disagreements was established to promote this approach to training and evaluating AI models, by making suitable datasets more accessible and by developing evaluation methods. The third edition of the task builds on this goal by extending the LEWIDI benchmark to four datasets spanning paraphrase identification, irony detection, sarcasm detection, and natural language inference, with labeling schemes that include not only categorical judgments as in previous editions, but ordinal judgments as well. Another novelty is that we adopt two complementary paradigms to evaluate disagreement-aware systems: the soft-label approach, in which models predict population-level distributions of judgments, and the perspectivist approach, in which models predict the interpretations of individual annotators. Crucially, we moved beyond standard metrics such as cross-entropy, and tested new evaluation metrics for the two paradigms. The task attracted diverse participation, and the results provide insights into the strengths and limitations of methods to modeling variation. Together, these contributions strengthen LEWIDI as a framework and provide new resources, benchmarks, and findings to support the development of disagreement-aware technologies.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08365v1" target="_blank" rel="noopener noreferrer">
                基于社交媒体的稳健高效自杀风险检测的两阶段投票方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Two-Stage Voting for Robust and Efficient Suicide Risk Detection on Social Media
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yukai Song, Pengfei Zhou, César Escobar-Viera, Candice Biernesser, Wei Huang, Ji...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于社交媒体上的自杀风险检测，这属于心理健康应用领域，与推荐系统、搜索或广告的核心技术无关。虽然可能涉及文本分析，但论文的焦点是医疗健康检测而非商业应用场景，完全超出了指定的关注范围。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 15:51:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08365v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08365v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Suicide rates have risen worldwide in recent years, underscoring the urgent need for proactive prevention strategies. Social media provides valuable signals, as many at-risk individuals - who often avoid formal help due to stigma - choose instead to share their distress online. Yet detecting implicit suicidal ideation, conveyed indirectly through metaphor, sarcasm, or subtle emotional cues, remains highly challenging. Lightweight models like BERT handle explicit signals but fail on subtle implicit ones, while large language models (LLMs) capture nuance at prohibitive computational cost. To address this gap, we propose a two-stage voting architecture that balances efficiency and robustness. In Stage 1, a lightweight BERT classifier rapidly resolves high-confidence explicit cases. In Stage 2, ambiguous inputs are escalated to either (i) a multi-perspective LLM voting framework to maximize recall on implicit ideation, or (ii) a feature-based ML ensemble guided by psychologically grounded indicators extracted via prompt-engineered LLMs for efficiency and interpretability. To the best of our knowledge, this is among the first works to operationalize LLM-extracted psychological features as structured vectors for suicide risk detection. On two complementary datasets - explicit-dominant Reddit and implicit-only DeepSuiMind - our framework outperforms single-model baselines, achieving 98.0% F1 on explicit cases, 99.7% on implicit ones, and reducing the cross-domain gap below 2%, while significantly lowering LLM cost.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08240v1" target="_blank" rel="noopener noreferrer">
                对齐华尔兹：联合训练智能体以实现安全协作
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            The Alignment Waltz: Jointly Training Agents to Collaborate for Safety
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jingyu Zhang, Haozhu Wang, Eric Michael Smith, Sid Wang, Amr Sharaf, Mahesh Pasu...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于多智能体协作中的安全对齐问题，属于强化学习安全领域。虽然涉及多智能体训练，但论文主题明确围绕安全协作，与搜索、推荐、广告等核心领域没有直接关联，也不涉及LLM技术、Transformer架构或异构数据建模等关注方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:03:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08240v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08240v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Harnessing the power of LLMs requires a delicate dance between being helpful and harmless. This creates a fundamental tension between two competing challenges: vulnerability to adversarial attacks that elicit unsafe content, and a tendency for overrefusal on benign but sensitive prompts. Current approaches often navigate this dance with safeguard models that completely reject any content that contains unsafe portions. This approach cuts the music entirely-it may exacerbate overrefusals and fails to provide nuanced guidance for queries it refuses. To teach models a more coordinated choreography, we propose WaltzRL, a novel multi-agent reinforcement learning framework that formulates safety alignment as a collaborative, positive-sum game. WaltzRL jointly trains a conversation agent and a feedback agent, where the latter is incentivized to provide useful suggestions that improve the safety and helpfulness of the conversation agent's responses. At the core of WaltzRL is a Dynamic Improvement Reward (DIR) that evolves over time based on how well the conversation agent incorporates the feedback. At inference time, unsafe or overrefusing responses from the conversation agent are improved rather than discarded. The feedback agent is deployed together with the conversation agent and only engages adaptively when needed, preserving helpfulness and low latency on safe queries. Our experiments, conducted across five diverse datasets, demonstrate that WaltzRL significantly reduces both unsafe responses (e.g., from 39.0% to 4.6% on WildJailbreak) and overrefusals (from 45.3% to 9.9% on OR-Bench) compared to various baselines. By enabling the conversation and feedback agents to co-evolve and adaptively apply feedback, WaltzRL enhances LLM safety without degrading general capabilities, thereby advancing the Pareto front between helpfulness and harmlessness.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08214v1" target="_blank" rel="noopener noreferrer">
                SenWave：一个基于COVID-19推文的细粒度多语言情感分析数据集
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SenWave: A Fine-Grained Multi-Language Sentiment Analysis Dataset Sourced from COVID-19 Tweets
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qiang Yang, Xiuying Chen, Changsheng Ma, Rui Yin, Xin Gao, Xiangliang Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于特定领域（COVID-19）的情感分析数据集构建，属于纯粹的NLP数据资源工作。虽然情感分析在理论上可能用于推荐系统或广告的用户反馈分析，但该论文的领域特定性和数据集性质使其与当前关注的推荐系统、搜索广告核心算法进展、Transformer架构改进或LLM直接应用等焦点领域几乎没有关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 13:38:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08214v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08214v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The global impact of the COVID-19 pandemic has highlighted the need for a comprehensive understanding of public sentiment and reactions. Despite the availability of numerous public datasets on COVID-19, some reaching volumes of up to 100 billion data points, challenges persist regarding the availability of labeled data and the presence of coarse-grained or inappropriate sentiment labels. In this paper, we introduce SenWave, a novel fine-grained multi-language sentiment analysis dataset specifically designed for analyzing COVID-19 tweets, featuring ten sentiment categories across five languages. The dataset comprises 10,000 annotated tweets each in English and Arabic, along with 30,000 translated tweets in Spanish, French, and Italian, derived from English tweets. Additionally, it includes over 105 million unlabeled tweets collected during various COVID-19 waves. To enable accurate fine-grained sentiment classification, we fine-tuned pre-trained transformer-based language models using the labeled tweets. Our study provides an in-depth analysis of the evolving emotional landscape across languages, countries, and topics, revealing significant insights over time. Furthermore, we assess the compatibility of our dataset with ChatGPT, demonstrating its robustness and versatility in various applications. Our dataset and accompanying code are publicly accessible on the repository\footnote{https://github.com/gitdevqiang/SenWave}. We anticipate that this work will foster further exploration into fine-grained sentiment analysis for complex events within the NLP community, promoting more nuanced understanding and research innovations.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08202v1" target="_blank" rel="noopener noreferrer">
                情感至关重要：200次人机交互分析
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Sentiment Matters: An Analysis of 200 Human-SAV Interactions
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lirui Guo, Michael G. Burke, Wynita M. Griggs
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题聚焦于人机交互中的情感分析，属于纯粹的HCI/心理学研究范畴，与推荐系统、搜索或广告的核心技术进展、LLM赋能技术或Transformer架构改进均无直接关联。即使考虑情感分析在内容理解中的应用，该研究明显侧重交互分析而非技术方法创新，缺乏对RecSys/Search/Ads领域的直接技术贡献。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 13:30:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08202v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08202v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.HC</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.ET</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Shared Autonomous Vehicles (SAVs) are likely to become an important part of the transportation system, making effective human-SAV interactions an important area of research. This paper introduces a dataset of 200 human-SAV interactions to further this area of study. We present an open-source human-SAV conversational dataset, comprising both textual data (e.g., 2,136 human-SAV exchanges) and empirical data (e.g., post-interaction survey results on a range of psychological factors). The dataset's utility is demonstrated through two benchmark case studies: First, using random forest modeling and chord diagrams, we identify key predictors of SAV acceptance and perceived service quality, highlighting the critical influence of response sentiment polarity (i.e., perceived positivity). Second, we benchmark the performance of an LLM-based sentiment analysis tool against the traditional lexicon-based TextBlob method. Results indicate that even simple zero-shot LLM prompts more closely align with user-reported sentiment, though limitations remain. This study provides novel insights for designing conversational SAV interfaces and establishes a foundation for further exploration into advanced sentiment modeling, adaptive user interactions, and multimodal conversational systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08191v1" target="_blank" rel="noopener noreferrer">
                免训练组相对策略优化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Training-Free Group Relative Policy Optimization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuzheng Cai, Siqi Cai, Yuchen Shi, Zihan Xu, Lichao Chen, Yulei Qin, Xiaoyu Tan,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题暗示了强化学习中的策略优化方法，但未提及与推荐系统、搜索或广告的任何关联。训练免训练方法可能涉及RL技术，但根据用户明确排除标准，没有明确相关性的强化学习论文应被视为不相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 13:18:17
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08191v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08191v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in Large Language Model (LLM) agents have demonstrated their promising general capabilities. However, their performance in specialized real-world domains often degrades due to challenges in effectively integrating external tools and specific prompting strategies. While methods like agentic reinforcement learning have been proposed to address this, they typically rely on costly parameter updates, for example, through a process that uses Supervised Fine-Tuning (SFT) followed by a Reinforcement Learning (RL) phase with Group Relative Policy Optimization (GRPO) to alter the output distribution. However, we argue that LLMs can achieve a similar effect on the output distribution by learning experiential knowledge as a token prior, which is a far more lightweight approach that not only addresses practical data scarcity but also avoids the common issue of overfitting. To this end, we propose Training-Free Group Relative Policy Optimization (Training-Free GRPO), a cost-effective solution that enhances LLM agent performance without any parameter updates. Our method leverages the group relative semantic advantage instead of numerical ones within each group of rollouts, iteratively distilling high-quality experiential knowledge during multi-epoch learning on a minimal ground-truth data. Such knowledge serves as the learned token prior, which is seamlessly integrated during LLM API calls to guide model behavior. Experiments on mathematical reasoning and web searching tasks demonstrate that Training-Free GRPO, when applied to DeepSeek-V3.1-Terminus, significantly improves out-of-domain performance. With just a few dozen training samples, Training-Free GRPO outperforms fine-tuned small LLMs with marginal training data and cost.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08188v1" target="_blank" rel="noopener noreferrer">
                METRICALARGS：一种使用大语言模型研究格律诗歌的分类法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            METRICALARGS: A Taxonomy for Studying Metrical Poetry with LLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chalamalasetti Kranti, Sowmya Vajjala
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于诗歌格律分析这一纯文学领域，与推荐系统、搜索或广告的核心技术进展完全无关。虽然提到了LLMs，但应用场景是诗歌研究这种非商业领域，不属于任何相关技术范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 13:14:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08188v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08188v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Prior NLP work studying poetry has focused primarily on automatic poem generation and summarization. Many languages have well-studied traditions of poetic meter which enforce constraints on a poem in terms of syllable and phoneme patterns. Such advanced literary forms offer opportunities for probing deeper reasoning and language understanding in Large Language Models (LLMs) and their ability to follow strict pre-requisites and rules. In this paper, we introduce MetricalARGS, the first taxonomy of poetry-related NLP tasks designed to evaluate LLMs on metrical poetry across four dimensions: Analysis, Retrieval, Generation, and Support. We discuss how these tasks relate to existing NLP tasks, addressing questions around datasets and evaluation metrics. Taking Telugu as our example language, we illustrate how the taxonomy can be used in practice. MetricalARGS highlights the broader possibilities for understanding the capabilities and limitations of today's LLMs through the lens of metrical poetry.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08158v1" target="_blank" rel="noopener noreferrer">
                超越过度拒绝：基于场景的诊断与事后缓解方法应对大语言模型中的夸大拒绝行为
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Over-Refusal: Scenario-Based Diagnostics and Post-Hoc Mitigation for Exaggerated Refusals in LLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuzhou Yuan, Ercong Nie, Yinuo Sun, Chenxuan Zhao, William LaCroix, Michael Fär...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的拒绝行为诊断和缓解，属于纯粹的NLP评估和安全性话题。虽然涉及LLM技术，但核心关注的是模型拒绝行为的评估和修正，与推荐系统、搜索或广告中的排序、匹配、个性化等核心任务没有直接关联，也不涉及Transformer架构改进或异构数据建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 12:38:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08158v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08158v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) frequently produce false refusals, declining benign requests that contain terms resembling unsafe queries. We address this challenge by introducing two comprehensive benchmarks: the Exaggerated Safety Benchmark (XSB) for single-turn prompts, annotated with "Focus" keywords that identify refusal-inducing triggers, and the Multi-turn Scenario-based Exaggerated Safety Benchmark (MS-XSB), which systematically evaluates refusal calibration in realistic, context-rich dialog settings. Our benchmarks reveal that exaggerated refusals persist across diverse recent LLMs and are especially pronounced in complex, multi-turn scenarios. To mitigate these failures, we leverage post-hoc explanation methods to identify refusal triggers and deploy three lightweight, model-agnostic approaches, ignore-word instructions, prompt rephrasing, and attention steering, at inference time, all without retraining or parameter access. Experiments on four instruction-tuned Llama models demonstrate that these strategies substantially improve compliance on safe prompts while maintaining robust safety protections. Our findings establish a reproducible framework for diagnosing and mitigating exaggerated refusals, highlighting practical pathways to safer and more helpful LLM deployments.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08114v1" target="_blank" rel="noopener noreferrer">
                敢于承担风险的AI助手能否恰当地代表实体
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Can Risk-taking AI-Assistants suitably represent entities
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ali Mazyaki, Mohammad Naghizadeh, Samaneh Ranjkhah Zonouzaghi, Amirhossein Farsh...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题聚焦于AI助手风险承担能力和实体代表性问题，这属于AI伦理、安全性和代理行为的范畴，与推荐系统、搜索或广告的核心技术进展完全无关。标题中没有任何技术元素表明与Transformer架构、LLM效率、推荐算法或多模态建模相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 11:55:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08114v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08114v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Responsible AI demands systems whose behavioral tendencies can be effectively measured, audited, and adjusted to prevent inadvertently nudging users toward risky decisions or embedding hidden biases in risk aversion. As language models (LMs) are increasingly incorporated into AI-driven decision support systems, understanding their risk behaviors is crucial for their responsible deployment. This study investigates the manipulability of risk aversion (MoRA) in LMs, examining their ability to replicate human risk preferences across diverse economic scenarios, with a focus on gender-specific attitudes, uncertainty, role-based decision-making, and the manipulability of risk aversion. The results indicate that while LMs such as DeepSeek Reasoner and Gemini-2.0-flash-lite exhibit some alignment with human behaviors, notable discrepancies highlight the need to refine bio-centric measures of manipulability. These findings suggest directions for refining AI design to better align human and AI risk preferences and enhance ethical decision-making. The study calls for further advancements in model design to ensure that AI systems more accurately replicate human risk preferences, thereby improving their effectiveness in risk management contexts. This approach could enhance the applicability of AI assistants in managing risk.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08058v1" target="_blank" rel="noopener noreferrer">
                FedDTRE：基于可信度评估驱动的联邦对话生成模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FedDTRE: Federated Dialogue Generation Models Powered by Trustworthiness Evaluation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shule Lu, Lingxiang Wang, Sijia Wen, Ziwei Wang, Hainan Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及联邦学习和对话生成，这两个主题均被明确列为不相关主题。联邦学习属于隐私/安全范畴，而对话生成属于纯粹的LLM中心化应用，与推荐系统、搜索或广告的核心技术进展没有直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 10:43:14
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08058v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08058v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the rapid development of artificial intelligence, dialogue systems have become a prominent form of human-computer interaction. However, traditional centralized or fully local training approaches face challenges in balancing privacy preservation and personalization due to data privacy concerns and heterogeneous device capabilities. Federated learning, as a representative distributed paradigm, offers a promising solution. However, existing methods often suffer from overfitting under limited client data and tend to forget global information after multiple training rounds, leading to poor generalization. To address these issues, we propose FedDTRE, a Federated adaptive aggregation strategy for Dialogue generation based on Trustworthiness Evaluation. Instead of directly replacing local models with the global model, FedDTRE leverages trustworthiness scores of both global and local models on a fairness-oriented evaluation dataset to dynamically regulate the global model's contribution during local updates. Experimental results demonstrate that FedDTRE can improve dialogue model performance and enhance the quality of dialogue generation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08047v1" target="_blank" rel="noopener noreferrer">
                Pseudo2Real：自动语音识别中伪标签校正的任务算术
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Pseudo2Real: Task Arithmetic for Pseudo-Label Correction in Automatic Speech Recognition
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yi-Cheng Lin, Yu-Hsuan Li Liang, Hsuan Su, Tzu-Quan Lin, Shang-Tse Chen, Yun-Nun...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动语音识别中的伪标签校正技术，属于纯粹的语音处理领域。虽然提到了任务算术的概念，但核心应用场景是语音识别，与推荐系统、搜索或广告领域没有直接关联，也不涉及LLM在RecSys/Search/Ads中的潜在应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 10:31:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08047v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08047v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">eess.AS</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Robust ASR under domain shift is crucial because real-world systems encounter unseen accents and domains with limited labeled data. Although pseudo-labeling offers a practical workaround, it often introduces systematic, accent-specific errors that filtering fails to fix. We ask: How can we correct these recurring biases without target ground truth? We propose a simple parameter-space correction: in a source domain containing both real and pseudo-labeled data, two ASR models are fine-tuned from the same initialization, one on ground-truth labels and the other on pseudo-labels, and their weight difference forms a correction vector that captures pseudo-label biases. When applied to a pseudo-labeled target model, this vector enhances recognition, achieving up to a 35% relative Word Error Rate (WER) reduction on AfriSpeech-200 across ten African accents with the Whisper tiny model.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08043v1" target="_blank" rel="noopener noreferrer">
                大型语言模型中的气候知识
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Climate Knowledge in Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ivan Kuznetsov, Jacopo Grassi, Dmitrii Pantiukhin, Boris Shapkin, Thomas Jung, N...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题聚焦于LLMs中的特定领域知识（气候），这属于纯粹的LLM知识评估范畴，与推荐系统、搜索或广告的核心技术进展无关。没有证据表明该研究涉及推荐系统架构、搜索算法改进、广告排名或任何与我的关注领域相关的技术应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 10:25:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08043v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08043v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span><span class="category-tag">physics.ao-ph</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) are increasingly deployed for climate-related applications, where understanding internal climatological knowledge is crucial for reliability and misinformation risk assessment. Despite growing adoption, the capacity of LLMs to recall climate normals from parametric knowledge remains largely uncharacterized. We investigate the capacity of contemporary LLMs to recall climate normals without external retrieval, focusing on a prototypical query: mean July 2-m air temperature 1991-2020 at specified locations. We construct a global grid of queries at 1{\deg} resolution land points, providing coordinates and location descriptors, and validate responses against ERA5 reanalysis. Results show that LLMs encode non-trivial climate structure, capturing latitudinal and topographic patterns, with root-mean-square errors of 3-6 {\deg}C and biases of $\pm$1 {\deg}C. However, spatially coherent errors remain, particularly in mountains and high latitudes. Performance degrades sharply above 1500 m, where RMSE reaches 5-13 {\deg}C compared to 2-4 {\deg}C at lower elevations. We find that including geographic context (country, city, region) reduces errors by 27% on average, with larger models being most sensitive to location descriptors. While models capture the global mean magnitude of observed warming between 1950-1974 and 2000-2024, they fail to reproduce spatial patterns of temperature change, which directly relate to assessing climate change. This limitation highlights that while LLMs may capture present-day climate distributions, they struggle to represent the regional and local expression of long-term shifts in temperature essential for understanding climate dynamics. Our evaluation framework provides a reproducible benchmark for quantifying parametric climate knowledge in LLMs and complements existing climate communication assessments.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08042v1" target="_blank" rel="noopener noreferrer">
                ChatGPT作为翻译引擎：日语-英语案例研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ChatGPT as a Translation Engine: A Case Study on Japanese-English
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Vincent Michael Sutanto, Giovanni Gatti De Giacomo, Toshiaki Nakazawa, Masaru Ya...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文纯粹研究ChatGPT在机器翻译领域的应用，属于NLP特定任务研究，与推荐系统、搜索或广告的核心技术发展没有直接关联。论文标题明确聚焦于翻译引擎功能，没有涉及任何推荐、搜索排名、广告投放或Transformer架构改进等关键技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 10:25:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08042v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08042v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This study investigates ChatGPT for Japanese-English translation, exploring simple and enhanced prompts and comparing against commercially available translation engines. Performing both automatic and MQM-based human evaluations, we found that document-level translation outperforms sentence-level translation for ChatGPT. On the other hand, we were not able to determine if enhanced prompts performed better than simple prompts in our experiments. We also discovered that ChatGPT-3.5 was preferred by automatic evaluation, but a tradeoff exists between accuracy (ChatGPT-3.5) and fluency (ChatGPT-4). Lastly, ChatGPT yields competitive results against two widely-known translation systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07978v1" target="_blank" rel="noopener noreferrer">
                VoiceAgentBench：语音助手是否已准备好执行智能体任务？
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VoiceAgentBench: Are Voice Assistants ready for agentic tasks?
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dhruv Jain, Harshit Shukla, Gautam Rajeev, Ashish Kulkarni, Chandra Khatri, Shub...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题聚焦于语音助手的智能体能力评估，属于语音交互和智能体评估领域，与推荐系统、搜索或广告的核心技术无直接关联。语音助手虽然涉及用户交互，但论文关注的是智能体任务准备度评估，而非推荐、搜索或广告中的实际应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 09:11:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07978v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07978v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large-scale Speech Language Models (SpeechLMs) have enabled voice assistants capable of understanding natural spoken queries and performing complex tasks. However, existing speech benchmarks primarily focus on isolated capabilities such as transcription, or question-answering, and do not systematically evaluate agentic scenarios encompassing multilingual and cultural understanding, as well as adversarial robustness. To address this, we introduce VoiceAgentBench, a comprehensive benchmark designed to evaluate SpeechLMs in realistic spoken agentic settings. It comprises over 5,500 synthetic spoken queries, including dialogues grounded in Indian context, covering single-tool invocations, multi-tool workflows, multi-turn interactions, and safety evaluations. The benchmark supports English, Hindi, and 5 other Indian languages, reflecting real-world linguistic and cultural diversity. We simulate speaker variability using a novel sampling algorithm that selects audios for TTS voice conversion based on its speaker embeddings, maximizing acoustic and speaker diversity. Our evaluation measures tool selection accuracy, structural consistency, and the correctness of tool invocations, including adversarial robustness. Our experiments reveal significant gaps in contextual tool orchestration tasks, Indic generalization, and adversarial robustness, exposing critical limitations of current SpeechLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07940v1" target="_blank" rel="noopener noreferrer">
                TTOM：组合式视频生成的测试时优化与记忆机制
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            TTOM: Test-Time Optimization and Memorization for Compositional Video Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Leigang Qu, Ziyang Wang, Na Zheng, Wenjie Wang, Liqiang Nie, Tat-Seng Chua
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频生成领域，属于纯粹的视觉内容生成任务，与推荐系统、搜索或广告的核心技术无关。论文标题明确指向视频生成中的组合优化问题，没有显示出任何在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:37:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07940v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07940v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span><span class="category-tag">cs.MM</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Video Foundation Models (VFMs) exhibit remarkable visual generation performance, but struggle in compositional scenarios (e.g., motion, numeracy, and spatial relation). In this work, we introduce Test-Time Optimization and Memorization (TTOM), a training-free framework that aligns VFM outputs with spatiotemporal layouts during inference for better text-image alignment. Rather than direct intervention to latents or attention per-sample in existing work, we integrate and optimize new parameters guided by a general layout-attention objective. Furthermore, we formulate video generation within a streaming setting, and maintain historical optimization contexts with a parametric memory mechanism that supports flexible operations, such as insert, read, update, and delete. Notably, we found that TTOM disentangles compositional world knowledge, showing powerful transferability and generalization. Experimental results on the T2V-CompBench and Vbench benchmarks establish TTOM as an effective, practical, scalable, and efficient framework to achieve cross-modal alignment for compositional video generation on the fly.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07931v1" target="_blank" rel="noopener noreferrer">
                基于视觉能力的LLM在历史词典学中的应用：数字化和丰富17-18世纪爱沙尼亚-德语词典
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Vision-Enabled LLMs in Historical Lexicography: Digitising and Enriching Estonian-German Dictionaries from the 17th and 18th Centuries
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Madis Jürviste, Joonatan Jakobson
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要涉及历史文档的数字保存和语言学研究，属于特定领域应用。虽然提到了视觉能力的LLM，但其应用场景（历史词典学）与推荐系统、搜索或广告的核心技术发展没有直接关联，也不涉及Transformer架构改进或异构数据处理等关键技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:29:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07931v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07931v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This article presents research conducted at the Institute of the Estonian Language between 2022 and 2025 on the application of large language models (LLMs) to the study of 17th and 18th century Estonian dictionaries. The authors address three main areas: enriching historical dictionaries with modern word forms and meanings; using vision-enabled LLMs to perform text recognition on sources printed in Gothic script (Fraktur); and preparing for the creation of a unified, cross-source dataset. Initial experiments with J. Gutslaff's 1648 dictionary indicate that LLMs have significant potential for semi-automatic enrichment of dictionary information. When provided with sufficient context, Claude 3.7 Sonnet accurately provided meanings and modern equivalents for 81% of headword entries. In a text recognition experiment with A. T. Helle's 1732 dictionary, a zero-shot method successfully identified and structured 41% of headword entries into error-free JSON-formatted output. For digitising the Estonian-German dictionary section of A. W. Hupel's 1780 grammar, overlapping tiling of scanned image files is employed, with one LLM being used for text recognition and a second for merging the structured output. These findings demonstrate that even for minor languages LLMs have a significant potential for saving time and financial resources.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07835v1" target="_blank" rel="noopener noreferrer">
                MetaDefense：在生成前后防御基于微调的越狱攻击
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MetaDefense: Defending Finetuning-based Jailbreak Attack Before and During Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Weisen Jiang, Sinno Jialin Pan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于LLM安全防御技术，特别是对抗越狱攻击的方法，这属于安全/隐私范畴，被明确列为无关主题。虽然涉及LLM技术，但其核心关注点是防御机制而非推荐系统、搜索或广告的应用潜力，与当前关注的领域进展、使能技术或直接应用无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 06:27:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07835v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07835v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.CR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper introduces MetaDefense, a novel framework for defending against finetuning-based jailbreak attacks in large language models (LLMs). We observe that existing defense mechanisms fail to generalize to harmful queries disguised by unseen attack templates, despite LLMs being capable of distinguishing disguised harmful queries in the embedding space. Based on these insights, we propose a two-stage defense approach: (i) pre-generation defense that detects harmful queries before response generation begins, and (ii) mid-generation defense that monitors partial responses during generation to prevent outputting more harmful content. Our MetaDefense trains the LLM to predict the harmfulness of both queries and partial responses using specialized prompts, enabling early termination of potentially harmful interactions. Extensive experiments across multiple LLM architectures (LLaMA-2-7B, Qwen-2.5-3B-Instruct, and LLaMA-3.2-3B-Instruct) demonstrate that MetaDefense significantly outperforms existing defense mechanisms, achieving robust defense against harmful queries with seen and unseen attack templates while maintaining competitive performance on benign tasks. Code is available at https://github.com/ws-jiang/MetaDefense.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07793v1" target="_blank" rel="noopener noreferrer">
                LLM4Cell：面向单细胞生物学的大语言与智能体模型综述
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LLM4Cell: A Survey of Large Language and Agentic Models for Single-Cell Biology
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sajib Acharjee Dip, Adrika Zafor, Bikash Kumar Paul, Uddip Acharjee Shuvo, Muhit...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于单细胞生物学这一生物医学领域，属于明确的无关主题范畴。标题中虽提及大语言模型，但其应用场景完全限定在生物学领域，与推荐系统、搜索或广告没有任何关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 05:12:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07793v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07793v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) and emerging agentic frameworks are beginning to transform single-cell biology by enabling natural-language reasoning, generative annotation, and multimodal data integration. However, progress remains fragmented across data modalities, architectures, and evaluation standards. LLM4Cell presents the first unified survey of 58 foundation and agentic models developed for single-cell research, spanning RNA, ATAC, multi-omic, and spatial modalities. We categorize these methods into five families-foundation, text-bridge, spatial, multimodal, epigenomic, and agentic-and map them to eight key analytical tasks including annotation, trajectory and perturbation modeling, and drug-response prediction. Drawing on over 40 public datasets, we analyze benchmark suitability, data diversity, and ethical or scalability constraints, and evaluate models across 10 domain dimensions covering biological grounding, multi-omics alignment, fairness, privacy, and explainability. By linking datasets, models, and evaluation domains, LLM4Cell provides the first integrated view of language-driven single-cell intelligence and outlines open challenges in interpretability, standardization, and trustworthy model development.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07775v1" target="_blank" rel="noopener noreferrer">
                AI对齐的意外权衡：在大型语言模型中平衡幻觉缓解与安全性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            The Unintended Trade-off of AI Alignment:Balancing Hallucination Mitigation and Safety in LLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Omar Mahmoud, Ali Khalil, Buddhika Laknath Semage, Thommen George Karimpanal, Sa...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的对齐、幻觉缓解和安全性问题，这些都属于纯粹的NLP中心话题，与推荐系统、搜索或广告的核心技术进展无关。虽然提到了幻觉缓解，但这属于被明确排除的无关主题范畴，没有展示出在推荐系统、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 04:30:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07775v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07775v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Hallucination in large language models (LLMs) has been widely studied in recent years, with progress in both detection and mitigation aimed at improving truthfulness. Yet, a critical side effect remains largely overlooked: enhancing truthfulness can negatively impact safety alignment. In this paper, we investigate this trade-off and show that increasing factual accuracy often comes at the cost of weakened refusal behavior. Our analysis reveals that this arises from overlapping components in the model that simultaneously encode hallucination and refusal information, leading alignment methods to suppress factual knowledge unintentionally. We further examine how fine-tuning on benign datasets, even when curated for safety, can degrade alignment for the same reason. To address this, we propose a method that disentangles refusal-related features from hallucination features using sparse autoencoders, and preserves refusal behavior during fine-tuning through subspace orthogonalization. This approach prevents hallucinations from increasing while maintaining safety alignment.We evaluate our method on commonsense reasoning tasks and harmful benchmarks (AdvBench and StrongReject). Results demonstrate that our approach preserves refusal behavior and task utility, mitigating the trade-off between truthfulness and safety.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07731v1" target="_blank" rel="noopener noreferrer">
                oMeBench：面向有机机理阐明与推理中大型语言模型鲁棒性基准测试
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            oMeBench: Towards Robust Benchmarking of LLMs in Organic Mechanism Elucidation and Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ruiling Xu, Yifan Zhang, Qingyun Wang, Carl Edwards, Heng Ji
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于有机化学机理领域的基准测试，属于化学领域特定应用，与推荐系统、搜索或广告完全无关。论文主题涉及有机化学机理推理，属于明确排除的医学/生物/化学等特定领域应用范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 03:13:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07731v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07731v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Organic reaction mechanisms are the stepwise elementary reactions by which reactants form intermediates and products, and are fundamental to understanding chemical reactivity and designing new molecules and reactions. Although large language models (LLMs) have shown promise in understanding chemical tasks such as synthesis design, it is unclear to what extent this reflects genuine chemical reasoning capabilities, i.e., the ability to generate valid intermediates, maintain chemical consistency, and follow logically coherent multi-step pathways. We address this by introducing oMeBench, the first large-scale, expert-curated benchmark for organic mechanism reasoning in organic chemistry. It comprises over 10,000 annotated mechanistic steps with intermediates, type labels, and difficulty ratings. Furthermore, to evaluate LLM capability more precisely and enable fine-grained scoring, we propose oMeS, a dynamic evaluation framework that combines step-level logic and chemical similarity. We analyze the performance of state-of-the-art LLMs, and our results show that although current models display promising chemical intuition, they struggle with correct and consistent multi-step reasoning. Notably, we find that using prompting strategy and fine-tuning a specialist model on our proposed dataset increases performance by 50% over the leading closed-source model. We hope that oMeBench will serve as a rigorous foundation for advancing AI systems toward genuine chemical reasoning.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08571v1" target="_blank" rel="noopener noreferrer">
                自动驾驶的可扩展离线评估指标
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Scalable Offline Metrics for Autonomous Driving
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Animikh Aich, Adwait Kulkarni, Eshed Ohn-Bar
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动驾驶领域的离线评估指标，属于特定领域应用（自动驾驶），与推荐系统、搜索或广告的核心技术进展、LLM技术或Transformer架构改进均无直接关联。自动驾驶的评估指标无法直接应用于RecSys/Search/Ads领域，也不涉及任何相关的技术迁移潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08571v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08571v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Real-World evaluation of perception-based planning models for robotic systems, such as autonomous vehicles, can be safely and inexpensively conducted offline, i.e., by computing model prediction error over a pre-collected validation dataset with ground-truth annotations. However, extrapolating from offline model performance to online settings remains a challenge. In these settings, seemingly minor errors can compound and result in test-time infractions or collisions. This relationship is understudied, particularly across diverse closed-loop metrics and complex urban maneuvers. In this work, we revisit this undervalued question in policy evaluation through an extensive set of experiments across diverse conditions and metrics. Based on analysis in simulation, we find an even worse correlation between offline and online settings than reported by prior studies, casting doubts on the validity of current evaluation practices and metrics for driving policies. Next, we bridge the gap between offline and online evaluation. We investigate an offline metric based on epistemic uncertainty, which aims to capture events that are likely to cause errors in closed-loop settings. The resulting metric achieves over 13% improvement in correlation compared to previous offline metrics. We further validate the generalization of our findings beyond the simulation environment in real-world settings, where even greater gains are observed.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08566v1" target="_blank" rel="noopener noreferrer">
                D²GS：基于深度和密度引导的高斯泼溅实现稳定且准确的稀疏视图重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            D$^2$GS: Depth-and-Density Guided Gaussian Splatting for Stable and Accurate Sparse-View Reconstruction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Meixi Song, Xin Lin, Dizhe Zhang, Haodong Li, Xiangtai Li, Bo Du, Lu Qi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的3D重建技术，特别是高斯泼溅和稀疏视图重建。虽然标题提到深度和密度引导，但这属于纯粹的3D视觉领域，与推荐系统、搜索或广告的核心技术没有直接关联。该技术主要应用于场景重建和图形学，没有明显的推荐、搜索或广告应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08566v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08566v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in 3D Gaussian Splatting (3DGS) enable real-time, high-fidelity novel view synthesis (NVS) with explicit 3D representations. However, performance degradation and instability remain significant under sparse-view conditions. In this work, we identify two key failure modes under sparse-view conditions: overfitting in regions with excessive Gaussian density near the camera, and underfitting in distant areas with insufficient Gaussian coverage. To address these challenges, we propose a unified framework D$^2$GS, comprising two key components: a Depth-and-Density Guided Dropout strategy that suppresses overfitting by adaptively masking redundant Gaussians based on density and depth, and a Distance-Aware Fidelity Enhancement module that improves reconstruction quality in under-fitted far-field areas through targeted supervision. Moreover, we introduce a new evaluation metric to quantify the stability of learned Gaussian distributions, providing insights into the robustness of the sparse-view 3DGS. Extensive experiments on multiple datasets demonstrate that our method significantly improves both visual quality and robustness under sparse view conditions. The project page can be found at: https://insta360-research-team.github.io/DDGS-website/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08562v1" target="_blank" rel="noopener noreferrer">
                ResAD：用于端到端自动驾驶的归一化残差轨迹建模
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ResAD: Normalized Residual Trajectory Modeling for End-to-End Autonomous Driving
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhiyu Zheng, Shaoyu Chen, Haoran Yin, Xinbang Zhang, Jialv Zou, Xinggang Wang, Q...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动驾驶领域，与搜索、推荐或广告系统没有直接关联。自动驾驶属于计算机视觉和机器人技术领域，不在当前关注的RecSys/Search/Ads核心领域或相关使能技术范围内。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08562v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08562v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    End-to-end autonomous driving (E2EAD) systems, which learn to predict future trajectories directly from sensor data, are fundamentally challenged by the inherent spatio-temporal imbalance of trajectory data. This imbalance creates a significant optimization burden, causing models to learn spurious correlations instead of causal inference, while also prioritizing uncertain, distant predictions, thereby compromising immediate safety. To address these issues, we propose ResAD, a novel Normalized Residual Trajectory Modeling framework. Instead of predicting the future trajectory directly, our approach reframes the learning task to predict the residual deviation from a deterministic inertial reference. The inertial reference serves as a counterfactual, forcing the model to move beyond simple pattern recognition and instead identify the underlying causal factors (e.g., traffic rules, obstacles) that necessitate deviations from a default, inertially-guided path. To deal with the optimization imbalance caused by uncertain, long-term horizons, ResAD further incorporates Point-wise Normalization of the predicted residual. It re-weights the optimization objective, preventing large-magnitude errors associated with distant, uncertain waypoints from dominating the learning signal. Extensive experiments validate the effectiveness of our framework. On the NAVSIM benchmark, ResAD achieves a state-of-the-art PDMS of 88.6 using a vanilla diffusion policy with only two denoising steps, demonstrating that our approach significantly simplifies the learning task and improves model performance. The code will be released to facilitate further research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08556v1" target="_blank" rel="noopener noreferrer">
                DexNDM：通过关节级神经动力学模型缩小灵巧手内旋转的现实差距
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DexNDM: Closing the Reality Gap for Dexterous In-Hand Rotation via Joint-Wise Neural Dynamics Model
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xueyi Liu, He Wang, Li Yi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于机器人领域的灵巧手操作和物理模拟，属于机器人控制与动力学建模范畴。虽然涉及神经网络模型，但其应用场景（手内物体旋转、现实差距）与推荐系统、搜索或广告领域没有任何直接或潜在的关联，完全超出了关注范围。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:59:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08556v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08556v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Achieving generalized in-hand object rotation remains a significant challenge in robotics, largely due to the difficulty of transferring policies from simulation to the real world. The complex, contact-rich dynamics of dexterous manipulation create a "reality gap" that has limited prior work to constrained scenarios involving simple geometries, limited object sizes and aspect ratios, constrained wrist poses, or customized hands. We address this sim-to-real challenge with a novel framework that enables a single policy, trained in simulation, to generalize to a wide variety of objects and conditions in the real world. The core of our method is a joint-wise dynamics model that learns to bridge the reality gap by effectively fitting limited amount of real-world collected data and then adapting the sim policy's actions accordingly. The model is highly data-efficient and generalizable across different whole-hand interaction distributions by factorizing dynamics across joints, compressing system-wide influences into low-dimensional variables, and learning each joint's evolution from its own dynamic profile, implicitly capturing these net effects. We pair this with a fully autonomous data collection strategy that gathers diverse, real-world interaction data with minimal human intervention. Our complete pipeline demonstrates unprecedented generality: a single policy successfully rotates challenging objects with complex shapes (e.g., animals), high aspect ratios (up to 5.33), and small sizes, all while handling diverse wrist orientations and rotation axes. Comprehensive real-world evaluations and a teleoperation application for complex tasks validate the effectiveness and robustness of our approach. Website: https://meowuu7.github.io/DexNDM/
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08553v1" target="_blank" rel="noopener noreferrer">
                梦想回忆：基于想象引导的经验检索用于记忆持久性视觉语言导航
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Dream to Recall: Imagination-Guided Experience Retrieval for Memory-Persistent Vision-and-Language Navigation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yunzhe Xu, Yiyuan Pan, Zhe Liu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视觉语言导航(VLN)领域，这是一个纯粹的机器人导航任务，与推荐系统、搜索或广告无关。虽然提到了检索机制，但这是针对导航环境中的经验记忆，而非用户行为或内容检索。该工作属于纯粹的视觉语言多模态研究，没有展示在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:58:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08553v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08553v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision-and-Language Navigation (VLN) requires agents to follow natural language instructions through environments, with memory-persistent variants demanding progressive improvement through accumulated experience. Existing approaches for memory-persistent VLN face critical limitations: they lack effective memory access mechanisms, instead relying on entire memory incorporation or fixed-horizon lookup, and predominantly store only environmental observations while neglecting navigation behavioral patterns that encode valuable decision-making strategies. We present Memoir, which employs imagination as a retrieval mechanism grounded by explicit memory: a world model imagines future navigation states as queries to selectively retrieve relevant environmental observations and behavioral histories. The approach comprises: 1) a language-conditioned world model that imagines future states serving dual purposes: encoding experiences for storage and generating retrieval queries; 2) Hybrid Viewpoint-Level Memory that anchors both observations and behavioral patterns to viewpoints, enabling hybrid retrieval; and 3) an experience-augmented navigation model that integrates retrieved knowledge through specialized encoders. Extensive evaluation across diverse memory-persistent VLN benchmarks with 10 distinctive testing scenarios demonstrates Memoir's effectiveness: significant improvements across all scenarios, with 5.4% SPL gains on IR2R over the best memory-persistent baseline, accompanied by 8.3x training speedup and 74% inference memory reduction. The results validate that predictive retrieval of both environmental and behavioral memories enables more effective navigation, with analysis indicating substantial headroom (73.3% vs 93.4% upper bound) for this imagination-guided paradigm. Code at https://github.com/xyz9911/Memoir.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08527v1" target="_blank" rel="noopener noreferrer">
                FlexTraj：基于灵活点轨迹控制的图像到视频生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FlexTraj: Image-to-Video Generation with Flexible Point Trajectory Control
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhiyuan Zhang, Can Wang, Dongdong Chen, Jing Liao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于图像到视频生成技术，属于纯粹的视觉内容生成领域，与推荐系统、搜索或广告的核心技术没有直接关联。虽然视频生成技术可能有潜在的广告创意应用，但这属于被明确排除的'非排序广告主题'和'纯视觉论文'范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:50:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08527v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08527v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present FlexTraj, a framework for image-to-video generation with flexible point trajectory control. FlexTraj introduces a unified point-based motion representation that encodes each point with a segmentation ID, a temporally consistent trajectory ID, and an optional color channel for appearance cues, enabling both dense and sparse trajectory control. Instead of injecting trajectory conditions into the video generator through token concatenation or ControlNet, FlexTraj employs an efficient sequence-concatenation scheme that achieves faster convergence, stronger controllability, and more efficient inference, while maintaining robustness under unaligned conditions. To train such a unified point trajectory-controlled video generator, FlexTraj adopts an annealing training strategy that gradually reduces reliance on complete supervision and aligned condition. Experimental results demonstrate that FlexTraj enables multi-granularity, alignment-agnostic trajectory control for video generation, supporting various applications such as motion cloning, drag-based image-to-video, motion interpolation, camera redirection, flexible action control and mesh animations.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08508v1" target="_blank" rel="noopener noreferrer">
                MoA-VR：面向一体化视频修复的智能体混合系统
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MoA-VR: A Mixture-of-Agents System Towards All-in-One Video Restoration
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lu Liu, Chunlei Cai, Shaocheng Shen, Jianfeng Liang, Weimin Ouyang, Tianxiao Ye,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频修复的计算机视觉任务，属于纯粹的视觉处理领域。虽然Mixture-of-Agents架构在概念上相关，但该工作没有展示与推荐系统、搜索或广告的潜在应用连接，完全属于被排除的视觉技术范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:42:51
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08508v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08508v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Real-world videos often suffer from complex degradations, such as noise, compression artifacts, and low-light distortions, due to diverse acquisition and transmission conditions. Existing restoration methods typically require professional manual selection of specialized models or rely on monolithic architectures that fail to generalize across varying degradations. Inspired by expert experience, we propose MoA-VR, the first \underline{M}ixture-\underline{o}f-\underline{A}gents \underline{V}ideo \underline{R}estoration system that mimics the reasoning and processing procedures of human professionals through three coordinated agents: Degradation Identification, Routing and Restoration, and Restoration Quality Assessment. Specifically, we construct a large-scale and high-resolution video degradation recognition benchmark and build a vision-language model (VLM) driven degradation identifier. We further introduce a self-adaptive router powered by large language models (LLMs), which autonomously learns effective restoration strategies by observing tool usage patterns. To assess intermediate and final processed video quality, we construct the \underline{Res}tored \underline{V}ideo \underline{Q}uality (Res-VQ) dataset and design a dedicated VLM-based video quality assessment (VQA) model tailored for restoration tasks. Extensive experiments demonstrate that MoA-VR effectively handles diverse and compound degradations, consistently outperforming existing baselines in terms of both objective metrics and perceptual quality. These results highlight the potential of integrating multimodal intelligence and modular reasoning in general-purpose video restoration systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08498v1" target="_blank" rel="noopener noreferrer">
                基于人工智能的创伤性脑损伤放射学报告生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AI-Driven Radiology Report Generation for Traumatic Brain Injuries
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Riadh Bouslimi, Houda Trabelsi, Wahiba Ben Abdssalem Karaa, Hana Hedhli
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域的放射学报告生成，属于医疗AI应用范畴，与推荐系统、搜索或广告领域完全无关。论文内容涉及创伤性脑损伤的医学诊断报告生成，属于明确的医疗领域特定应用，完全超出您关注的技术领域范围。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:39:04
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08498v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08498v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">eess.IV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span><span class="category-tag">68T07</span><span class="category-tag">68U10</span><span class="category-tag">I.2.10; I.2.7; I.4.5</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Traumatic brain injuries present significant diagnostic challenges in emergency medicine, where the timely interpretation of medical images is crucial for patient outcomes. In this paper, we propose a novel AI-based approach for automatic radiology report generation tailored to cranial trauma cases. Our model integrates an AC-BiFPN with a Transformer architecture to capture and process complex medical imaging data such as CT and MRI scans. The AC-BiFPN extracts multi-scale features, enabling the detection of intricate anomalies like intracranial hemorrhages, while the Transformer generates coherent, contextually relevant diagnostic reports by modeling long-range dependencies. We evaluate the performance of our model on the RSNA Intracranial Hemorrhage Detection dataset, where it outperforms traditional CNN-based models in both diagnostic accuracy and report generation. This solution not only supports radiologists in high-pressure environments but also provides a powerful educational tool for trainee physicians, offering real-time feedback and enhancing their learning experience. Our findings demonstrate the potential of combining advanced feature extraction with transformer-based text generation to improve clinical decision-making in the diagnosis of traumatic brain injuries.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08491v1" target="_blank" rel="noopener noreferrer">
                Splat the Net：基于可溅射神经基元的辐射场
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Splat the Net: Radiance Fields with Splattable Neural Primitives
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xilong Zhou, Bao-Huy Nguyen, Loïc Magne, Vladislav Golyanik, Thomas Leimkühler, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题涉及计算机视觉中的辐射场和神经基元技术，属于3D视觉和图形学领域。这些技术与推荐系统、搜索或广告的核心技术栈没有直接关联，也不具备在相关领域应用的明显潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:31:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08491v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08491v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.GR</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Radiance fields have emerged as a predominant representation for modeling 3D scene appearance. Neural formulations such as Neural Radiance Fields provide high expressivity but require costly ray marching for rendering, whereas primitive-based methods such as 3D Gaussian Splatting offer real-time efficiency through splatting, yet at the expense of representational power. Inspired by advances in both these directions, we introduce splattable neural primitives, a new volumetric representation that reconciles the expressivity of neural models with the efficiency of primitive-based splatting. Each primitive encodes a bounded neural density field parameterized by a shallow neural network. Our formulation admits an exact analytical solution for line integrals, enabling efficient computation of perspectively accurate splatting kernels. As a result, our representation supports integration along view rays without the need for costly ray marching. The primitives flexibly adapt to scene geometry and, being larger than prior analytic primitives, reduce the number required per scene. On novel-view synthesis benchmarks, our approach matches the quality and speed of 3D Gaussian Splatting while using $10\times$ fewer primitives and $6\times$ fewer parameters. These advantages arise directly from the representation itself, without reliance on complex control or adaptation frameworks. The project page is https://vcai.mpi-inf.mpg.de/projects/SplatNet/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08475v1" target="_blank" rel="noopener noreferrer">
                DexMan：从人类和生成视频中学习双手灵巧操作
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DexMan: Learning Bimanual Dexterous Manipulation from Human and Generated Videos
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jhen Hsieh, Kuan-Hsun Tu, Kuo-Han Hung, Tsung-Wei Ke
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于机器人双手灵巧操作学习，属于纯粹的机器人控制领域。虽然涉及从视频中学习，但这是针对物理机器人动作模仿，与推荐系统、搜索或广告中的序列建模、多模态理解等核心技术没有直接关联。该技术缺乏在RecSys/Search/Ads领域的潜在应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 17:17:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08475v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08475v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present DexMan, an automated framework that converts human visual demonstrations into bimanual dexterous manipulation skills for humanoid robots in simulation. Operating directly on third-person videos of humans manipulating rigid objects, DexMan eliminates the need for camera calibration, depth sensors, scanned 3D object assets, or ground-truth hand and object motion annotations. Unlike prior approaches that consider only simplified floating hands, it directly controls a humanoid robot and leverages novel contact-based rewards to improve policy learning from noisy hand-object poses estimated from in-the-wild videos. DexMan achieves state-of-the-art performance in object pose estimation on the TACO benchmark, with absolute gains of 0.08 and 0.12 in ADD-S and VSD. Meanwhile, its reinforcement learning policy surpasses previous methods by 19% in success rate on OakInk-v2. Furthermore, DexMan can generate skills from both real and synthetic videos, without the need for manual data collection and costly motion capture, and enabling the creation of large-scale, diverse datasets for training generalist dexterous manipulation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08407v1" target="_blank" rel="noopener noreferrer">
                基于生物学驱动的深度学习牙本质孔隙网络超分辨率成像评估
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Biology-driven assessment of deep learning super-resolution imaging of the porosity network in dentin
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lauren Anderson, Lucas Chatelain, Nicolas Tremblay, Kathryn Grandfield, David Ro...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于牙本质孔隙网络的医学成像应用，属于明确的生物学/医学领域，与推荐系统、搜索或广告技术完全无关。深度学习在这里仅作为医学图像处理的工具，没有任何潜在的应用于RecSys/Search/Ads的可能性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:26:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08407v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08407v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CV</span><span class="category-tag">q-bio.TO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The mechanosensory system of teeth is currently believed to partly rely on Odontoblast cells stimulation by fluid flow through a porosity network extending through dentin. Visualizing the smallest sub-microscopic porosity vessels therefore requires the highest achievable resolution from confocal fluorescence microscopy, the current gold standard. This considerably limits the extent of the field of view to very small sample regions. To overcome this limitation, we tested different deep learning (DL) super-resolution (SR) models to allow faster experimental acquisitions of lower resolution images and restore optimal image quality by post-processing. Three supervised 2D SR models (RCAN, pix2pix, FSRCNN) and one unsupervised (CycleGAN) were applied to a unique set of experimentally paired high- and low-resolution confocal images acquired with different sampling schemes, resulting in a pixel size increase of x2, x4, x8. Model performance was quantified using a broad set of similarity and distribution-based image quality assessment (IQA) metrics, which yielded inconsistent results that mostly contradicted our visual perception. This raises the question of the relevance of such generic metrics to efficiently target the specific structure of dental porosity. To resolve this conflicting information, the generated SR images were segmented taking into account the specific scales and morphology of the porosity network and analysed by comparing connected components. Additionally, the capacity of the SR models to preserve 3D porosity connectivity throughout the confocal image stacks was evaluated using graph analysis. This biology-driven assessment allowed a far better mechanistic interpretation of SR performance, highlighting differences in model sensitivity to weak intensity features and the impact of non-linearity in image generation, which explains the failure of standard IQA metrics.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08393v1" target="_blank" rel="noopener noreferrer">
                基于课程学习的医学图像分割鲁棒无源域自适应
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Robust Source-Free Domain Adaptation for Medical Image Segmentation based on Curriculum Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ziqi Zhang, Yuexiang Li, Yawen Huang, Nanjun He, Tao Xu, Liwei Lin, Yefeng Zheng...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学图像分割的领域自适应问题，这属于医学/生物领域的特定应用，与推荐系统、搜索或广告的核心技术无关。论文内容涉及课程学习和无源域自适应，但这些技术在该文中被应用于医学图像处理，没有显示出在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 16:15:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08393v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08393v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent studies have uncovered a new research line, namely source-free domain adaptation, which adapts a model to target domains without using the source data. Such a setting can address the concerns on data privacy and security issues of medical images. However, current source-free domain adaptation frameworks mainly focus on the pseudo label refinement for target data without the consideration of learning procedure. Indeed, a progressive learning process from source to target domain will benefit the knowledge transfer during model adaptation. To this end, we propose a curriculum-based framework, namely learning from curriculum (LFC), for source-free domain adaptation, which consists of easy-to-hard and source-to-target curricula. Concretely, the former curriculum enables the framework to start learning with `easy' samples and gradually tune the optimization direction of model adaption by increasing the sample difficulty. While, the latter can stablize the adaptation process, which ensures smooth transfer of the model from the source domain to the target. We evaluate the proposed source-free domain adaptation approach on the public cross-domain datasets for fundus segmentation and polyp segmentation. The extensive experimental results show that our framework surpasses the existing approaches and achieves a new state-of-the-art.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08358v1" target="_blank" rel="noopener noreferrer">
                SPICE：简单实用的图像清晰化与增强
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SPICE: Simple and Practical Image Clarification and Enhancement
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Alexander Belyaev, Pierre-Alain Fayolle, Michael Cohen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于图像处理技术，属于纯粹的计算机视觉领域，与推荐系统、搜索或广告的核心技术栈没有直接关联。虽然图像增强技术可能在某些特定场景下作为预处理步骤，但论文本身并未表明与异构数据建模、Transformer架构或LLM应用有任何联系，因此相关性极低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 15:43:07
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08358v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08358v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce a simple and efficient method to enhance and clarify images. More specifically, we deal with low light image enhancement and clarification of hazy imagery (hazy/foggy images, images containing sand dust, and underwater images). Our method involves constructing an image filter to simulate low-light or hazy conditions and deriving approximate reverse filters to minimize distortions in the enhanced images. Experimental results show that our approach is highly competitive and often surpasses state-of-the-art techniques in handling extremely dark images and in enhancing hazy images. A key advantage of our approach lies in its simplicity: Our method is implementable with just a few lines of MATLAB code.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08318v1" target="_blank" rel="noopener noreferrer">
                LinVideo：一种实现高效视频生成中O(n)注意力的训练后框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LinVideo: A Post-Training Framework towards O(n) Attention in Efficient Video Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yushi Huang, Xingtong Ge, Ruihao Gong, Chengtao Lv, Jun Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频生成领域的注意力效率优化，属于纯粹的视觉内容生成范畴。虽然涉及注意力机制效率改进，但其应用场景仅限于视频生成，与推荐系统、搜索或广告的排名和建模需求没有直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 15:03:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08318v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08318v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Video diffusion models (DMs) have enabled high-quality video synthesis. However, their computation costs scale quadratically with sequence length because self-attention has quadratic complexity. While linear attention lowers the cost, fully replacing quadratic attention requires expensive pretraining due to the limited expressiveness of linear attention and the complexity of spatiotemporal modeling in video generation. In this paper, we present LinVideo, an efficient data-free post-training framework that replaces a target number of self-attention modules with linear attention while preserving the original model's performance. First, we observe a significant disparity in the replaceability of different layers. Instead of manual or heuristic choices, we frame layer selection as a binary classification problem and propose selective transfer, which automatically and progressively converts layers to linear attention with minimal performance impact. Additionally, to overcome the ineffectiveness and inefficiency of existing objectives for this transfer process, we introduce an anytime distribution matching (ADM) objective that aligns the distributions of samples across any timestep along the sampling trajectory. This objective is efficient and recovers model performance. Extensive experiments show that our method achieves a 1.25-2.00x speedup while preserving generation quality, and our 4-step distilled model further delivers a 15.92x latency reduction with minimal visual quality drop.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08273v1" target="_blank" rel="noopener noreferrer">
                一石二鸟：面向文本引导图像修复的空文本-空频率感知扩散模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            One Stone with Two Birds: A Null-Text-Null Frequency-Aware Diffusion Models for Text-Guided Image Inpainting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haipeng Liu, Yang Wang, Meng Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本引导的图像修复和扩散模型，这属于纯粹的视觉内容生成领域。虽然扩散模型是LLM相关技术，但该工作专注于图像修复的具体应用，与推荐系统、搜索或广告中的排序、检索或用户建模没有直接关联。该技术没有明显的潜力应用于RecSys/Search/Ads领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:30:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08273v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08273v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Text-guided image inpainting aims at reconstructing the masked regions as per text prompts, where the longstanding challenges lie in the preservation for unmasked regions, while achieving the semantics consistency between unmasked and inpainted masked regions. Previous arts failed to address both of them, always with either of them to be remedied. Such facts, as we observed, stem from the entanglement of the hybrid (e.g., mid-and-low) frequency bands that encode varied image properties, which exhibit different robustness to text prompts during the denoising process. In this paper, we propose a null-text-null frequency-aware diffusion models, dubbed \textbf{NTN-Diff}, for text-guided image inpainting, by decomposing the semantics consistency across masked and unmasked regions into the consistencies as per each frequency band, while preserving the unmasked regions, to circumvent two challenges in a row. Based on the diffusion process, we further divide the denoising process into early (high-level noise) and late (low-level noise) stages, where the mid-and-low frequency bands are disentangled during the denoising process. As observed, the stable mid-frequency band is progressively denoised to be semantically aligned during text-guided denoising process, which, meanwhile, serves as the guidance to the null-text denoising process to denoise low-frequency band for the masked regions, followed by a subsequent text-guided denoising process at late stage, to achieve the semantics consistency for mid-and-low frequency bands across masked and unmasked regions, while preserve the unmasked regions. Extensive experiments validate the superiority of NTN-Diff over the state-of-the-art diffusion models to text-guided diffusion models. Our code can be accessed from https://github.com/htyjers/NTN-Diff.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08271v1" target="_blank" rel="noopener noreferrer">
                SViM3D：基于稳定视频材料扩散的单图像3D生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SViM3D: Stable Video Material Diffusion for Single Image 3D Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Andreas Engelhardt, Mark Boss, Vikram Voletti, Chun-Han Yao, Hendrik P. A. Lensc...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D视觉生成领域，涉及单图像到3D的转换和视频材料扩散技术，属于纯粹的计算机视觉研究方向。虽然标题提到扩散模型，但核心应用场景是3D内容生成，与推荐系统、搜索或广告的排名、匹配、用户建模等核心任务没有直接关联。该技术缺乏在RecSys/Search/Ads领域的潜在应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:29:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08271v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08271v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.GR</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present Stable Video Materials 3D (SViM3D), a framework to predict multi-view consistent physically based rendering (PBR) materials, given a single image. Recently, video diffusion models have been successfully used to reconstruct 3D objects from a single image efficiently. However, reflectance is still represented by simple material models or needs to be estimated in additional steps to enable relighting and controlled appearance edits. We extend a latent video diffusion model to output spatially varying PBR parameters and surface normals jointly with each generated view based on explicit camera control. This unique setup allows for relighting and generating a 3D asset using our model as neural prior. We introduce various mechanisms to this pipeline that improve quality in this ill-posed setting. We show state-of-the-art relighting and novel view synthesis performance on multiple object-centric datasets. Our method generalizes to diverse inputs, enabling the generation of relightable 3D assets useful in AR/VR, movies, games and other visual media.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08269v1" target="_blank" rel="noopener noreferrer">
                遥感图像场景分类中单正例多标签学习的自适应梯度校准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Adaptive Gradient Calibration for Single-Positive Multi-Label Learning in Remote Sensing Image Scene Classification
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chenying Liu, Gianmarco Perantoni, Lorenzo Bruzzone, Xiao Xiang Zhu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于遥感图像场景分类，属于纯粹的计算机视觉领域，与推荐系统、搜索或广告没有明显关联。论文讨论的单正例多标签学习和梯度校准技术是特定于图像分类任务的，无法看出在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:26:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08269v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08269v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multi-label classification (MLC) offers a more comprehensive semantic understanding of Remote Sensing (RS) imagery compared to traditional single-label classification (SLC). However, obtaining complete annotations for MLC is particularly challenging due to the complexity and high cost of the labeling process. As a practical alternative, single-positive multi-label learning (SPML) has emerged, where each image is annotated with only one relevant label, and the model is expected to recover the full set of labels. While scalable, SPML introduces significant supervision ambiguity, demanding specialized solutions for model training. Although various SPML methods have been proposed in the computer vision domain, research in the RS context remains limited. To bridge this gap, we propose Adaptive Gradient Calibration (AdaGC), a novel and generalizable SPML framework tailored to RS imagery. AdaGC adopts a gradient calibration (GC) mechanism combined with Mixup and a dual exponential moving average (EMA) module for robust pseudo-label generation. To maximize AdaGC's effectiveness, we introduce a simple yet theoretically grounded indicator to adaptively trigger GC after an initial warm-up stage based on training dynamics, thereby guaranteeing the effectiveness of GC in mitigating overfitting to label noise. Extensive experiments on two benchmark RS datasets under two distinct label noise types demonstrate that AdaGC achieves state-of-the-art (SOTA) performance while maintaining strong robustness across diverse settings.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08260v1" target="_blank" rel="noopener noreferrer">
                基于细粒度文本驱动的双人动作生成：通过动态分层交互实现
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Fine-grained text-driven dual-human motion generation via dynamic hierarchical interaction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mu Li, Yin Wang, Zhiying Leng, Jiapeng Liu, Frederick W. B. Li, Xiaohui Liang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本驱动的双人动作生成，属于计算机图形学或动画领域，与推荐系统、搜索或广告的核心技术无关。虽然涉及文本到动作的生成，但这本质上是内容生成任务，属于被明确排除的AIGC和纯视觉应用范畴，没有明显的推荐、搜索或广告应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 14:18:53
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08260v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08260v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Human interaction is inherently dynamic and hierarchical, where the dynamic refers to the motion changes with distance, and the hierarchy is from individual to inter-individual and ultimately to overall motion. Exploiting these properties is vital for dual-human motion generation, while existing methods almost model human interaction temporally invariantly, ignoring distance and hierarchy. To address it, we propose a fine-grained dual-human motion generation method, namely FineDual, a tri-stage method to model the dynamic hierarchical interaction from individual to inter-individual. The first stage, Self-Learning Stage, divides the dual-human overall text into individual texts through a Large Language Model, aligning text features and motion features at the individual level. The second stage, Adaptive Adjustment Stage, predicts interaction distance by an interaction distance predictor, modeling human interactions dynamically at the inter-individual level by an interaction-aware graph network. The last stage, Teacher-Guided Refinement Stage, utilizes overall text features as guidance to refine motion features at the overall level, generating fine-grained and high-quality dual-human motion. Extensive quantitative and qualitative evaluations on dual-human motion datasets demonstrate that our proposed FineDual outperforms existing approaches, effectively modeling dynamic hierarchical human interaction.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08181v1" target="_blank" rel="noopener noreferrer">
                InstructUDrag：基于联合文本指令和对象拖拽的交互式图像编辑
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            InstructUDrag: Joint Text Instructions and Object Dragging for Interactive Image Editing
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haoran Yu, Yi Shi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于交互式图像编辑技术，涉及文本指令和对象拖拽操作，属于纯粹的计算机视觉和图像生成领域。虽然标题提到'文本指令'，但核心是图像编辑而非语言模型在推荐/搜索/广告中的应用，与当前关注的推荐系统、搜索广告、Transformer架构进展或异构数据统一建模等焦点完全无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 13:06:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08181v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08181v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Text-to-image diffusion models have shown great potential for image editing, with techniques such as text-based and object-dragging methods emerging as key approaches. However, each of these methods has inherent limitations: text-based methods struggle with precise object positioning, while object dragging methods are confined to static relocation. To address these issues, we propose InstructUDrag, a diffusion-based framework that combines text instructions with object dragging, enabling simultaneous object dragging and text-based image editing. Our framework treats object dragging as an image reconstruction process, divided into two synergistic branches. The moving-reconstruction branch utilizes energy-based gradient guidance to move objects accurately, refining cross-attention maps to enhance relocation precision. The text-driven editing branch shares gradient signals with the reconstruction branch, ensuring consistent transformations and allowing fine-grained control over object attributes. We also employ DDPM inversion and inject prior information into noise maps to preserve the structure of moved objects. Extensive experiments demonstrate that InstructUDrag facilitates flexible, high-fidelity image editing, offering both precision in object relocation and semantic control over image content.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08157v1" target="_blank" rel="noopener noreferrer">
                超越文本思维链：基于深度置信推理的交错文本-图像链用于图像编辑
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Textual CoT: Interleaved Text-Image Chains with Deep Confidence Reasoning for Image Editing
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhentao Zou, Zhengrong Yue, Kunpeng Du, Binlei Bao, Hanting Li, Haizhen Xie, Guo...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于图像编辑任务，涉及视觉模态和内容生成，属于纯粹的视觉应用领域。虽然提到了置信推理，但核心关注点是图像编辑而非推荐系统、搜索或广告中的排序或理解任务。该工作没有明显的潜在应用可以转移到RecSys/Search/Ads领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 12:36:51
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08157v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08157v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Image editing with natural language has gained significant popularity, yet existing methods struggle with intricate object intersections and fine-grained spatial relationships due to the lack of an explicit reasoning process. While Chain-of-Thought (CoT) has been explored to enhance reasoning, purely textual CoT or CoT augmented with coordinate information is fundamentally limited in its ability to represent intricate visual layouts and lacks the necessary visual cues to guide the generation of fine-grained, pixel-level details. To address these challenges, we propose Multimodal Reasoning Edit (MURE), a novel framework that shifts the visual editing process from purely text-based reasoning to a series of interleaved textual and visual rationales. Our framework performs image editing using a natively multimodal, interleaved text-image CoT. This approach generates a step-by-step chain of reasoning where a textual description is followed by a corresponding visual cue, such as a positional mask that defined intended edited regions or a representation of new content. Furthermore, to mitigate the hallucination phenomenon of large language models, we introduce Multimodal Deep Confidence (MMDC) reasoning paradigm. This paradigm explores a tree of visual reasoning paths at each step. By pruning low-quality branches using a deep confidence score from a reward model, it ensures the model consistently follows a high-quality trajectory towards the final edited result. The proposed method decomposes complex editing tasks into interdependent sub-tasks, achieving greater precision at each stage and yielding high-fidelity edited results. We define the formulation for interleaved text-image chains and release the first CoT-Edit-14K dataset, comprising 14K high-quality editing examples. Extensive experiments show that our method yields significant improvements across three image editing benchmarks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08131v1" target="_blank" rel="noopener noreferrer">
                实时运动可控自回归视频扩散模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Real-Time Motion-Controllable Autoregressive Video Diffusion
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kesen Zhao, Jiaxin Shi, Beier Zhu, Junbao Zhou, Xiaolong Shen, Yuan Zhou, Qianru...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于视频生成和运动控制，属于纯粹的视觉内容生成领域。虽然提到了自回归建模，但核心关注点是视频生成技术，与推荐系统、搜索或广告的排名和建模需求没有直接关联。该技术缺乏在RecSys/Search/Ads领域的明确应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 12:17:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08131v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08131v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Real-time motion-controllable video generation remains challenging due to the inherent latency of bidirectional diffusion models and the lack of effective autoregressive (AR) approaches. Existing AR video diffusion models are limited to simple control signals or text-to-video generation, and often suffer from quality degradation and motion artifacts in few-step generation. To address these challenges, we propose AR-Drag, the first RL-enhanced few-step AR video diffusion model for real-time image-to-video generation with diverse motion control. We first fine-tune a base I2V model to support basic motion control, then further improve it via reinforcement learning with a trajectory-based reward model. Our design preserves the Markov property through a Self-Rollout mechanism and accelerates training by selectively introducing stochasticity in denoising steps. Extensive experiments demonstrate that AR-Drag achieves high visual fidelity and precise motion alignment, significantly reducing latency compared with state-of-the-art motion-controllable VDMs, while using only 1.3B parameters. Additional visualizations can be found on our project page: https://kesenzhao.github.io/AR-Drag.github.io/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08116v1" target="_blank" rel="noopener noreferrer">
                用于CT和肝脏肿瘤分割中深度学习鲁棒性的随机窗口增强
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Random Window Augmentations for Deep Learning Robustness in CT and Liver Tumor Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Eirik A. Østmo, Kristoffer K. Wickstrøm, Keyur Radiya, Michael C. Kampffmeyer, K...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学影像分割领域，特别是CT扫描和肝脏肿瘤分割，这属于医学/生物学应用范畴。虽然提到了数据增强技术，但该技术完全应用于医疗领域，与推荐系统、搜索或广告没有任何关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 11:57:04
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08116v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08116v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Contrast-enhanced Computed Tomography (CT) is important for diagnosis and treatment planning for various medical conditions. Deep learning (DL) based segmentation models may enable automated medical image analysis for detecting and delineating tumors in CT images, thereby reducing clinicians' workload. Achieving generalization capabilities in limited data domains, such as radiology, requires modern DL models to be trained with image augmentation. However, naively applying augmentation methods developed for natural images to CT scans often disregards the nature of the CT modality, where the intensities measure Hounsfield Units (HU) and have important physical meaning. This paper challenges the use of such intensity augmentations for CT imaging and shows that they may lead to artifacts and poor generalization. To mitigate this, we propose a CT-specific augmentation technique, called Random windowing, that exploits the available HU distribution of intensities in CT images. Random windowing encourages robustness to contrast-enhancement and significantly increases model performance on challenging images with poor contrast or timing. We perform ablations and analysis of our method on multiple datasets, and compare to, and outperform, state-of-the-art alternatives, while focusing on the challenge of liver tumor segmentation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08096v1" target="_blank" rel="noopener noreferrer">
                基于3D高斯溅射的极端姿态下人脸解析高效标签精炼
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Efficient Label Refinement for Face Parsing Under Extreme Poses Using 3D Gaussian Splatting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ankit Gahlawat, Anirban Mukherjee, Dinesh Babu Jayagopi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的人脸解析和3D高斯溅射技术，属于纯粹的视觉处理范畴。论文内容涉及人脸姿态处理和标签精炼，与推荐系统、搜索或广告的核心技术栈没有直接关联，也无法识别出在推荐/搜索/广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 11:34:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08096v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08096v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Accurate face parsing under extreme viewing angles remains a significant challenge due to limited labeled data in such poses. Manual annotation is costly and often impractical at scale. We propose a novel label refinement pipeline that leverages 3D Gaussian Splatting (3DGS) to generate accurate segmentation masks from noisy multiview predictions. By jointly fitting two 3DGS models, one to RGB images and one to their initial segmentation maps, our method enforces multiview consistency through shared geometry, enabling the synthesis of pose-diverse training data with only minimal post-processing. Fine-tuning a face parsing model on this refined dataset significantly improves accuracy on challenging head poses, while maintaining strong performance on standard views. Extensive experiments, including human evaluations, demonstrate that our approach achieves superior results compared to state-of-the-art methods, despite requiring no ground-truth 3D annotations and using only a small set of initial images. Our method offers a scalable and effective solution for improving face parsing robustness in real-world settings.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08094v1" target="_blank" rel="noopener noreferrer">
                DarkHash：一种针对深度哈希的无数据后门攻击
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DarkHash: A Data-Free Backdoor Attack Against Deep Hashing
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ziqi Zhou, Menghao Deng, Yufei Song, Hangtao Zhang, Wei Wan, Shengshan Hu, Mingh...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于深度哈希系统的后门攻击和安全性问题，这属于网络安全和模型安全领域。虽然深度哈希技术可能在某些推荐系统中用于相似性搜索，但论文的核心焦点是攻击方法和安全漏洞，这明确属于被排除的"安全、隐私"等非技术性话题范畴。该研究没有涉及推荐系统、搜索或广告的核心算法改进或LLM技术应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 11:28:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08094v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08094v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Benefiting from its superior feature learning capabilities and efficiency, deep hashing has achieved remarkable success in large-scale image retrieval. Recent studies have demonstrated the vulnerability of deep hashing models to backdoor attacks. Although these studies have shown promising attack results, they rely on access to the training dataset to implant the backdoor. In the real world, obtaining such data (e.g., identity information) is often prohibited due to privacy protection and intellectual property concerns. Embedding backdoors into deep hashing models without access to the training data, while maintaining retrieval accuracy for the original task, presents a novel and challenging problem. In this paper, we propose DarkHash, the first data-free backdoor attack against deep hashing. Specifically, we design a novel shadow backdoor attack framework with dual-semantic guidance. It embeds backdoor functionality and maintains original retrieval accuracy by fine-tuning only specific layers of the victim model using a surrogate dataset. We consider leveraging the relationship between individual samples and their neighbors to enhance backdoor attacks during training. By designing a topological alignment loss, we optimize both individual and neighboring poisoned samples toward the target sample, further enhancing the attack capability. Experimental results on four image datasets, five model architectures, and two hashing methods demonstrate the high effectiveness of DarkHash, outperforming existing state-of-the-art backdoor attack methods. Defense experiments show that DarkHash can withstand existing mainstream backdoor defense methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08073v1" target="_blank" rel="noopener noreferrer">
                物理驱动的时空建模用于AI生成视频检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Physics-Driven Spatiotemporal Modeling for AI-Generated Video Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuhai Zhang, ZiHao Lian, Jiahao Yang, Daiyuan Li, Guoxuan Pang, Feng Liu, Bo Ha...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于AI生成视频检测这一特定计算机视觉任务，属于内容检测和鉴伪领域。虽然涉及AI生成内容，但这是纯粹的视觉应用，与推荐系统、搜索或广告中的排序、匹配、用户建模等核心问题没有直接关联。论文的物理驱动建模方法也没有显示出在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 11:00:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08073v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08073v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    AI-generated videos have achieved near-perfect visual realism (e.g., Sora), urgently necessitating reliable detection mechanisms. However, detecting such videos faces significant challenges in modeling high-dimensional spatiotemporal dynamics and identifying subtle anomalies that violate physical laws. In this paper, we propose a physics-driven AI-generated video detection paradigm based on probability flow conservation principles. Specifically, we propose a statistic called Normalized Spatiotemporal Gradient (NSG), which quantifies the ratio of spatial probability gradients to temporal density changes, explicitly capturing deviations from natural video dynamics. Leveraging pre-trained diffusion models, we develop an NSG estimator through spatial gradients approximation and motion-aware temporal modeling without complex motion decomposition while preserving physical constraints. Building on this, we propose an NSG-based video detection method (NSG-VD) that computes the Maximum Mean Discrepancy (MMD) between NSG features of the test and real videos as a detection metric. Last, we derive an upper bound of NSG feature distances between real and generated videos, proving that generated videos exhibit amplified discrepancies due to distributional shifts. Extensive experiments confirm that NSG-VD outperforms state-of-the-art baselines by 16.00% in Recall and 10.75% in F1-Score, validating the superior performance of NSG-VD. The source code is available at https://github.com/ZSHsh98/NSG-VD.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08067v1" target="_blank" rel="noopener noreferrer">
                迈向真实世界深度伪造检测：一个多样化的野外伪造人脸数据集
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Towards Real-World Deepfake Detection: A Diverse In-the-wild Dataset of Forgery Faces
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Junyu Shi, Minghui Li, Junguo Zuo, Zhifei Yu, Yipeng Lin, Shengshan Hu, Ziqi Zho...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于深度伪造检测和计算机视觉安全领域，与推荐系统、搜索或广告的核心技术无关。虽然深度伪造检测在内容审核中有应用，但这属于安全/信任范畴，属于明确排除的非技术性话题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 10:54:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08067v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08067v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Deepfakes, leveraging advanced AIGC (Artificial Intelligence-Generated Content) techniques, create hyper-realistic synthetic images and videos of human faces, posing a significant threat to the authenticity of social media. While this real-world threat is increasingly prevalent, existing academic evaluations and benchmarks for detecting deepfake forgery often fall short to achieve effective application for their lack of specificity, limited deepfake diversity, restricted manipulation techniques.To address these limitations, we introduce RedFace (Real-world-oriented Deepfake Face), a specialized facial deepfake dataset, comprising over 60,000 forged images and 1,000 manipulated videos derived from authentic facial features, to bridge the gap between academic evaluations and real-world necessity. Unlike prior benchmarks, which typically rely on academic methods to generate deepfakes, RedFace utilizes 9 commercial online platforms to integrate the latest deepfake technologies found "in the wild", effectively simulating real-world black-box scenarios.Moreover, RedFace's deepfakes are synthesized using bespoke algorithms, allowing it to capture diverse and evolving methods used by real-world deepfake creators. Extensive experimental results on RedFace (including cross-domain, intra-domain, and real-world social network dissemination simulations) verify the limited practicality of existing deepfake detection schemes against real-world applications. We further perform a detailed analysis of the RedFace dataset, elucidating the reason of its impact on detection performance compared to conventional datasets. Our dataset is available at: https://github.com/kikyou-220/RedFace.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08060v1" target="_blank" rel="noopener noreferrer">
                一种用于多光谱遥感图像分类的类驱动分层残差网络
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A class-driven hierarchical ResNet for classification of multispectral remote sensing images
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Giulio Weikmann, Gianmarco Perantoni, Lorenzo Bruzzone
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于多光谱遥感图像分类，属于纯粹的计算机视觉应用领域，与推荐系统、搜索或广告没有任何直接关联。论文提出的类驱动分层残差网络架构是特定于遥感图像处理的解决方案，没有显示出在推荐、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 10:47:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08060v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08060v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This work presents a multitemporal class-driven hierarchical Residual Neural Network (ResNet) designed for modelling the classification of Time Series (TS) of multispectral images at different semantical class levels. The architecture consists of a modification of the ResNet where we introduce additional branches to perform the classification at the different hierarchy levels and leverage on hierarchy-penalty maps to discourage incoherent hierarchical transitions within the classification. In this way, we improve the discrimination capabilities of classes at different levels of semantic details and train a modular architecture that can be used as a backbone network for introducing new specific classes and additional tasks considering limited training samples available. We exploit the class-hierarchy labels to train efficiently the different layers of the architecture, allowing the first layers to train faster on the first levels of the hierarchy modeling general classes (i.e., the macro-classes) and the intermediate classes, while using the last ones to discriminate more specific classes (i.e., the micro-classes). In this way, the targets are constrained in following the hierarchy defined, improving the classification of classes at the most detailed level. The proposed modular network has intrinsic adaptation capability that can be obtained through fine tuning. The experimental results, obtained on two tiles of the Amazonian Forest on 12 monthly composites of Sentinel 2 images acquired during 2019, demonstrate the effectiveness of the hierarchical approach in both generalizing over different hierarchical levels and learning discriminant features for an accurate classification at the micro-class level on a new target area, with a better representation of the minoritarian classes.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08054v1" target="_blank" rel="noopener noreferrer">
                RetouchLLM：无需训练的白盒图像润色
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RetouchLLM: Training-free White-box Image Retouching
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Moon Ye-Bin, Roy Miles, Tae-Hyun Oh, Ismail Elezi, Jiankang Deng
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于图像处理领域的白盒图像润色技术，属于纯粹的计算机视觉应用。虽然涉及LLM，但核心是图像编辑任务，与推荐系统、搜索或广告的排名和建模需求没有直接关联。该技术不具备在RecSys/Search/Ads领域的明显应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 10:40:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08054v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08054v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Image retouching not only enhances visual quality but also serves as a means of expressing personal preferences and emotions. However, existing learning-based approaches require large-scale paired data and operate as black boxes, making the retouching process opaque and limiting their adaptability to handle diverse, user- or image-specific adjustments. In this work, we propose RetouchLLM, a training-free white-box image retouching system, which requires no training data and performs interpretable, code-based retouching directly on high-resolution images. Our framework progressively enhances the image in a manner similar to how humans perform multi-step retouching, allowing exploration of diverse adjustment paths. It comprises of two main modules: a visual critic that identifies differences between the input and reference images, and a code generator that produces executable codes. Experiments demonstrate that our approach generalizes well across diverse retouching styles, while natural language-based user interaction enables interpretable and controllable adjustments tailored to user intent.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.08052v1" target="_blank" rel="noopener noreferrer">
                RASALoRE：基于区域感知空间注意力与位置随机嵌入的脑部MRI扫描弱监督异常检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RASALoRE: Region Aware Spatial Attention with Location-based Random Embeddings for Weakly Supervised Anomaly Detection in Brain MRI Scans
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Bheeshm Sharma, Karthikeyan Jaganathan, Balamurugan Palaniappan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学影像（脑部MRI）的异常检测，属于医疗领域的特定应用。虽然提到了注意力机制，但其核心应用场景（脑部MRI）与推荐系统、搜索或广告领域完全无关。该技术没有明显的潜力应用于RecSys/Search/Ads领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 10:37:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.08052v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.08052v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Weakly Supervised Anomaly detection (WSAD) in brain MRI scans is an important challenge useful to obtain quick and accurate detection of brain anomalies when precise pixel-level anomaly annotations are unavailable and only weak labels (e.g., slice-level) are available. In this work, we propose RASALoRE: Region Aware Spatial Attention with Location-based Random Embeddings, a novel two-stage WSAD framework. In the first stage, we introduce a Discriminative Dual Prompt Tuning (DDPT) mechanism that generates high-quality pseudo weak masks based on slice-level labels, serving as coarse localization cues. In the second stage, we propose a segmentation network with a region-aware spatial attention mechanism that relies on fixed location-based random embeddings. This design enables the model to effectively focus on anomalous regions. Our approach achieves state-of-the-art anomaly detection performance, significantly outperforming existing WSAD methods while utilizing less than 8 million parameters. Extensive evaluations on the BraTS20, BraTS21, BraTS23, and MSD datasets demonstrate a substantial performance improvement coupled with a significant reduction in computational complexity. Code is available at: https://github.com/BheeshmSharma/RASALoRE-BMVC-2025/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07990v1" target="_blank" rel="noopener noreferrer">
                GraphEnet：基于图神经网络的事件驱动人体姿态估计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            GraphEnet: Event-driven Human Pose Estimation with a Graph Neural Network
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Gaurvi Goyal, Pham Cong Thuong, Arren Glover, Masayoshi Mizuno, Chiara Bartolozz...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的人体姿态估计，使用图神经网络处理事件驱动数据。虽然涉及图神经网络技术，但该工作纯粹针对视觉任务，没有展示在推荐系统、搜索或广告领域的潜在应用。人体姿态估计与文本/序列建模、用户行为分析或内容排名等核心关注领域相距甚远。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 09:24:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07990v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07990v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Human Pose Estimation is a crucial module in human-machine interaction applications and, especially since the rise in deep learning technology, robust methods are available to consumers using RGB cameras and commercial GPUs. On the other hand, event-based cameras have gained popularity in the vision research community for their low latency and low energy advantages that make them ideal for applications where those resources are constrained like portable electronics and mobile robots. In this work we propose a Graph Neural Network, GraphEnet, that leverages the sparse nature of event camera output, with an intermediate line based event representation, to estimate 2D Human Pose of a single person at a high frequency. The architecture incorporates a novel offset vector learning paradigm with confidence based pooling to estimate the human pose. This is the first work that applies Graph Neural Networks to event data for Human Pose Estimation. The code is open-source at https://github.com/event-driven-robotics/GraphEnet-NeVi-ICCV2025.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07976v1" target="_blank" rel="noopener noreferrer">
                摘要和对象标签对图像隐私分类的影响
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            The impact of abstract and object tags on image privacy classification
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Darya Baranouskaya, Andrea Cavallaro
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于图像隐私分类，属于计算机视觉和隐私保护领域，与推荐系统、搜索或广告的核心技术无关。虽然涉及标签分类，但主要关注隐私而非推荐或搜索场景中的内容理解与匹配，因此不符合当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 09:09:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07976v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07976v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Object tags denote concrete entities and are central to many computer vision tasks, whereas abstract tags capture higher-level information, which is relevant for tasks that require a contextual, potentially subjective scene understanding. Object and abstract tags extracted from images also facilitate interpretability. In this paper, we explore which type of tags is more suitable for the context-dependent and inherently subjective task of image privacy. While object tags are generally used for privacy classification, we show that abstract tags are more effective when the tag budget is limited. Conversely, when a larger number of tags per image is available, object-related information is as useful. We believe that these findings will guide future research in developing more accurate image privacy classifiers, informed by the role of tag types and quantity.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07961v1" target="_blank" rel="noopener noreferrer">
                潜在和谐：通过潜在空间正则化与可控精细化实现协同统一超高清图像修复
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Latent Harmony: Synergistic Unified UHD Image Restoration via Latent Space Regularization and Controllable Refinement
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yidi Liu, Xueyang Fu, Jie Huang, Jie Xiao, Dong Li, Wenlong Zhang, Lei Bai, Zhen...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的超高清图像修复技术，涉及潜在空间正则化和可控精细化方法。虽然技术上有一定先进性，但论文内容纯粹针对图像处理任务，与推荐系统、搜索或广告的核心技术领域没有任何直接或间接的应用关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:54:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07961v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07961v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Ultra-High Definition (UHD) image restoration faces a trade-off between computational efficiency and high-frequency detail retention. While Variational Autoencoders (VAEs) improve efficiency via latent-space processing, their Gaussian constraint often discards degradation-specific high-frequency information, hurting reconstruction fidelity. To overcome this, we propose Latent Harmony, a two-stage framework that redefines VAEs for UHD restoration by jointly regularizing the latent space and enforcing high-frequency-aware reconstruction.In Stage One, we introduce LH-VAE, which enhances semantic robustness through visual semantic constraints and progressive degradation perturbations, while latent equivariance strengthens high-frequency reconstruction.Stage Two jointly trains this refined VAE with a restoration model using High-Frequency Low-Rank Adaptation (HF-LoRA): an encoder LoRA guided by a fidelity-oriented high-frequency alignment loss to recover authentic details, and a decoder LoRA driven by a perception-oriented loss to synthesize realistic textures. Both LoRA modules are trained via alternating optimization with selective gradient propagation to preserve the pretrained latent structure.At inference, a tunable parameter {\alpha} enables flexible fidelity-perception trade-offs.Experiments show Latent Harmony achieves state-of-the-art performance across UHD and standard-resolution tasks, effectively balancing efficiency, perceptual quality, and reconstruction accuracy.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07953v1" target="_blank" rel="noopener noreferrer">
                SimCast：通过短期到长期知识蒸馏增强降水临近预报
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SimCast: Enhancing Precipitation Nowcasting with Short-to-Long Term Knowledge Distillation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yifang Yin, Shengkai Chen, Yiyao Li, Lu Wang, Ruibing Jin, Wei Cui, Shili Xiang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于气象领域的降水预报，属于纯粹的领域特定应用，与推荐系统、搜索或广告没有任何关联。知识蒸馏技术虽然本身是通用方法，但论文的应用场景和核心问题与我的关注领域完全无关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:49:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07953v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07953v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Precipitation nowcasting predicts future radar sequences based on current observations, which is a highly challenging task driven by the inherent complexity of the Earth system. Accurate nowcasting is of utmost importance for addressing various societal needs, including disaster management, agriculture, transportation, and energy optimization. As a complementary to existing non-autoregressive nowcasting approaches, we investigate the impact of prediction horizons on nowcasting models and propose SimCast, a novel training pipeline featuring a short-to-long term knowledge distillation technique coupled with a weighted MSE loss to prioritize heavy rainfall regions. Improved nowcasting predictions can be obtained without introducing additional overhead during inference. As SimCast generates deterministic predictions, we further integrate it into a diffusion-based framework named CasCast, leveraging the strengths from probabilistic models to overcome limitations such as blurriness and distribution shift in deterministic outputs. Extensive experimental results on three benchmark datasets validate the effectiveness of the proposed framework, achieving mean CSI scores of 0.452 on SEVIR, 0.474 on HKO-7, and 0.361 on MeteoNet, which outperforms existing approaches by a significant margin.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07951v1" target="_blank" rel="noopener noreferrer">
                用于鲁棒复杂动漫场景文本检测的大规模数据集
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Large-scale Dataset for Robust Complex Anime Scene Text Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ziyi Dong, Yurui Zhang, Changmao Li, Naomi Rue Golding, Qing Long
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于动漫场景中的文本检测，属于计算机视觉领域，与推荐系统、搜索或广告的核心技术无直接关联。虽然文本检测在广义上可能与内容理解相关，但该研究的动漫特定性和视觉焦点使其与所列技术领域相距甚远。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:47:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07951v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07951v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Current text detection datasets primarily target natural or document scenes, where text typically appear in regular font and shapes, monotonous colors, and orderly layouts. The text usually arranged along straight or curved lines. However, these characteristics differ significantly from anime scenes, where text is often diverse in style, irregularly arranged, and easily confused with complex visual elements such as symbols and decorative patterns. Text in anime scene also includes a large number of handwritten and stylized fonts. Motivated by this gap, we introduce AnimeText, a large-scale dataset containing 735K images and 4.2M annotated text blocks. It features hierarchical annotations and hard negative samples tailored for anime scenarios. %Cross-dataset evaluations using state-of-the-art methods demonstrate that models trained on AnimeText achieve superior performance in anime text detection tasks compared to existing datasets. To evaluate the robustness of AnimeText in complex anime scenes, we conducted cross-dataset benchmarking using state-of-the-art text detection methods. Experimental results demonstrate that models trained on AnimeText outperform those trained on existing datasets in anime scene text detection tasks. AnimeText on HuggingFace: https://huggingface.co/datasets/deepghs/AnimeText
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07927v1" target="_blank" rel="noopener noreferrer">
                ASBench：用于异常检测的图像异常合成基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ASBench: Image Anomalies Synthesis Benchmark for Anomaly Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Qunyi Zhang, Songan Zhang, Jinbao Wang, Xiaoning Lei, Guoyang Xie, Guannan Jiang...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的图像异常检测基准测试，与推荐系统、搜索或广告的核心技术领域没有直接关联。图像异常检测主要应用于工业质检、医疗影像等视觉领域，无法为RecSys/Search/Ads提供有价值的技术启示或应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:23:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07927v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07927v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Anomaly detection plays a pivotal role in manufacturing quality control, yet its application is constrained by limited abnormal samples and high manual annotation costs. While anomaly synthesis offers a promising solution, existing studies predominantly treat anomaly synthesis as an auxiliary component within anomaly detection frameworks, lacking systematic evaluation of anomaly synthesis algorithms. Current research also overlook crucial factors specific to anomaly synthesis, such as decoupling its impact from detection, quantitative analysis of synthetic data and adaptability across different scenarios. To address these limitations, we propose ASBench, the first comprehensive benchmarking framework dedicated to evaluating anomaly synthesis methods. Our framework introduces four critical evaluation dimensions: (i) the generalization performance across different datasets and pipelines (ii) the ratio of synthetic to real data (iii) the correlation between intrinsic metrics of synthesis images and anomaly detection performance metrics , and (iv) strategies for hybrid anomaly synthesis methods. Through extensive experiments, ASBench not only reveals limitations in current anomaly synthesis methods but also provides actionable insights for future research directions in anomaly synthesis
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07910v1" target="_blank" rel="noopener noreferrer">
                MMM：基于量子化学分子表征学习的组合药物推荐
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MMM: Quantum-Chemical Molecular Representation Learning for Combinatorial Drug Recommendation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chongmyung Kwon, Yujin Kim, Seoeun Park, Yunji Lee, Charmgil Hong
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于药物推荐这一特定医疗领域应用，属于明确的无关主题范畴。虽然标题包含'推荐'一词，但内容涉及量子化学分子表征和组合药物推荐，与搜索、推荐系统或广告的核心技术进展无关，且没有明显的通用推荐系统技术迁移潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 08:03:14
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07910v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07910v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span><span class="category-tag">I.2.6; I.5.1</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Drug recommendation is an essential task in machine learning-based clinical decision support systems. However, the risk of drug-drug interactions (DDI) between co-prescribed medications remains a significant challenge. Previous studies have used graph neural networks (GNNs) to represent drug structures. Regardless, their simplified discrete forms cannot fully capture the molecular binding affinity and reactivity. Therefore, we propose Multimodal DDI Prediction with Molecular Electron Localization Function (ELF) Maps (MMM), a novel framework that integrates three-dimensional (3D) quantum-chemical information into drug representation learning. It generates 3D electron density maps using the ELF. To capture both therapeutic relevance and interaction risks, MMM combines ELF-derived features that encode global electronic properties with a bipartite graph encoder that models local substructure interactions. This design enables learning complementary characteristics of drug molecules. We evaluate MMM in the MIMIC-III dataset (250 drugs, 442 substructures), comparing it with several baseline models. In particular, a comparison with the GNN-based SafeDrug model demonstrates statistically significant improvements in the F1-score (p = 0.0387), Jaccard (p = 0.0112), and the DDI rate (p = 0.0386). These results demonstrate the potential of ELF-based 3D representations to enhance prediction accuracy and support safer combinatorial drug prescribing in clinical practice.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07905v1" target="_blank" rel="noopener noreferrer">
                SatFusion：一种通过多时序和多源数据融合增强卫星物联网图像的统一框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SatFusion: A Unified Framework for Enhancing Satellite IoT Images via Multi-Temporal and Multi-Source Data Fusion
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yufei Tong, Guanjie Cheng, Peihan Wu, Yicheng Zhu, Kexu Lu, Feiyi Chen, Meng Xi,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于卫星图像处理和物联网数据融合，属于遥感技术领域。虽然涉及多源数据融合概念，但与推荐系统、搜索或广告的核心技术没有直接关联，也不涉及LLM、Transformer架构或异构数据建模在推荐领域的应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 07:59:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07905v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07905v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">eess.IV</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.MM</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the rapid advancement of the digital society, the proliferation of satellites in the Satellite Internet of Things (Sat-IoT) has led to the continuous accumulation of large-scale multi-temporal and multi-source images across diverse application scenarios. However, existing methods fail to fully exploit the complementary information embedded in both temporal and source dimensions. For example, Multi-Image Super-Resolution (MISR) enhances reconstruction quality by leveraging temporal complementarity across multiple observations, yet the limited fine-grained texture details in input images constrain its performance. Conversely, pansharpening integrates multi-source images by injecting high-frequency spatial information from panchromatic data, but typically relies on pre-interpolated low-resolution inputs and assumes noise-free alignment, making it highly sensitive to noise and misregistration. To address these issues, we propose SatFusion: A Unified Framework for Enhancing Satellite IoT Images via Multi-Temporal and Multi-Source Data Fusion. Specifically, SatFusion first employs a Multi-Temporal Image Fusion (MTIF) module to achieve deep feature alignment with the panchromatic image. Then, a Multi-Source Image Fusion (MSIF) module injects fine-grained texture information from the panchromatic data. Finally, a Fusion Composition module adaptively integrates the complementary advantages of both modalities while dynamically refining spectral consistency, supervised by a weighted combination of multiple loss functions. Extensive experiments on the WorldStrat, WV3, QB, and GF2 datasets demonstrate that SatFusion significantly improves fusion quality, robustness under challenging conditions, and generalizability to real-world Sat-IoT scenarios. The code is available at: https://github.com/dllgyufei/SatFusion.git.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07878v1" target="_blank" rel="noopener noreferrer">
                FlowLensing：使用流匹配模拟引力透镜效应
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FlowLensing: Simulating Gravitational Lensing with Flow Matching
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hamees Sayed, Pranath Reddy, Michael W. Toomey, Sergei Gleyzer
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文涉及物理学中的引力透镜模拟，使用流匹配技术解决物理模拟问题。该主题与推荐系统、搜索或广告领域没有任何直接或间接的联系，完全超出了关注范围。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 07:31:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07878v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07878v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">astro-ph.IM</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Gravitational lensing is one of the most powerful probes of dark matter, yet creating high-fidelity lensed images at scale remains a bottleneck. Existing tools rely on ray-tracing or forward-modeling pipelines that, while precise, are prohibitively slow. We introduce FlowLensing, a Diffusion Transformer-based compact and efficient flow-matching model for strong gravitational lensing simulation. FlowLensing operates in both discrete and continuous regimes, handling classes such as different dark matter models as well as continuous model parameters ensuring physical consistency. By enabling scalable simulations, our model can advance dark matter studies, specifically for probing dark matter substructure in cosmological surveys. We find that our model achieves a speedup of over 200$\times$ compared to classical simulators for intensive dark matter models, with high fidelity and low inference latency. FlowLensing enables rapid, scalable, and physically consistent image synthesis, offering a practical alternative to traditional forward-modeling pipelines.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07871v1" target="_blank" rel="noopener noreferrer">
                小米EV-AD VLA团队：通过主动风险感知学习社会性导航——IROS 2025 RoboSense挑战赛社会导航赛道技术报告
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Team Xiaomi EV-AD VLA: Learning to Navigate Socially Through Proactive Risk Perception - Technical Report for IROS 2025 RoboSense Challenge Social Navigation Track
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Erjia Xiao, Lingfeng Zhang, Yingbo Tang, Hao Cheng, Renjing Xu, Wenbo Ding, Lei ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于机器人社会导航和主动风险感知，属于自动驾驶领域。虽然标题包含'VLA'（可能指视觉语言模型），但核心内容围绕机器人导航挑战赛，与推荐系统、搜索或广告没有任何直接或间接关联。该技术无法应用于RecSys/Search/Ads领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 07:22:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07871v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07871v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In this report, we describe the technical details of our submission to the IROS 2025 RoboSense Challenge Social Navigation Track. This track focuses on developing RGBD-based perception and navigation systems that enable autonomous agents to navigate safely, efficiently, and socially compliantly in dynamic human-populated indoor environments. The challenge requires agents to operate from an egocentric perspective using only onboard sensors including RGB-D observations and odometry, without access to global maps or privileged information, while maintaining social norm compliance such as safe distances and collision avoidance. Building upon the Falcon model, we introduce a Proactive Risk Perception Module to enhance social navigation performance. Our approach augments Falcon with collision risk understanding that learns to predict distance-based collision risk scores for surrounding humans, which enables the agent to develop more robust spatial awareness and proactive collision avoidance behaviors. The evaluation on the Social-HM3D benchmark demonstrates that our method improves the agent's ability to maintain personal space compliance while navigating toward goals in crowded indoor scenes with dynamic human agents, achieving 2nd place among 16 participating teams in the challenge.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07856v1" target="_blank" rel="noopener noreferrer">
                XYZCylinder：基于统一圆柱提升方法的驾驶场景前馈重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            XYZCylinder: Feedforward Reconstruction for Driving Scenes Based on A Unified Cylinder Lifting Method
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haochen Yu, Qiankun Liu, Hongyuan Liu, Jianfei Jiang, Juntao Lyu, Jiansheng Chen...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的驾驶场景重建，属于纯粹的视觉技术领域。虽然标题提到"重建"和"驾驶场景"，但这与推荐系统、搜索或广告的核心技术没有直接关联，也没有涉及LLM技术或Transformer架构的进展，无法看出在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 06:58:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07856v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07856v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recently, more attention has been paid to feedforward reconstruction paradigms, which mainly learn a fixed view transformation implicitly and reconstruct the scene with a single representation. However, their generalization capability and reconstruction accuracy are still limited while reconstructing driving scenes, which results from two aspects: (1) The fixed view transformation fails when the camera configuration changes, limiting the generalization capability across different driving scenes equipped with different camera configurations. (2) The small overlapping regions between sparse views of the $360^\circ$ panorama and the complexity of driving scenes increase the learning difficulty, reducing the reconstruction accuracy. To handle these difficulties, we propose \textbf{XYZCylinder}, a feedforward model based on a unified cylinder lifting method which involves camera modeling and feature lifting. Specifically, to improve the generalization capability, we design a Unified Cylinder Camera Modeling (UCCM) strategy, which avoids the learning of viewpoint-dependent spatial correspondence and unifies different camera configurations with adjustable parameters. To improve the reconstruction accuracy, we propose a hybrid representation with several dedicated modules based on newly designed Cylinder Plane Feature Group (CPFG) to lift 2D image features to 3D space. Experimental results show that XYZCylinder achieves state-of-the-art performance under different evaluation settings, and can be generalized to other driving scenes in a zero-shot manner. Project page: \href{https://yuyuyu223.github.io/XYZCYlinder-projectpage/}{here}.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07853v1" target="_blank" rel="noopener noreferrer">
                用于测试新化学品和材料毒性的平台的自监督学习策略
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Self-Supervised Learning Strategies for a Platform to Test the Toxicity of New Chemicals and Materials
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Thomas Lautenschlager, Nils Friederich, Angelo Jovin Yamachui Sitcheu, Katja Nau...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于化学和材料毒性测试领域，这属于明确的无关主题范畴（医学、生物学、化学或其他领域特定应用）。尽管提到了自监督学习，但核心应用场景与推荐系统、搜索或广告完全无关，且没有显示出任何在这些领域应用的潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 06:51:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07853v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07853v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    High-throughput toxicity testing offers a fast and cost-effective way to test large amounts of compounds. A key component for such systems is the automated evaluation via machine learning models. In this paper, we address critical challenges in this domain and demonstrate how representations learned via self-supervised learning can effectively identify toxicant-induced changes. We provide a proof-of-concept that utilizes the publicly available EmbryoNet dataset, which contains ten zebrafish embryo phenotypes elicited by various chemical compounds targeting different processes in early embryonic development. Our analysis shows that the learned representations using self-supervised learning are suitable for effectively distinguishing between the modes-of-action of different compounds. Finally, we discuss the integration of machine learning models in a physical toxicity testing device in the context of the TOXBOX project.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07837v1" target="_blank" rel="noopener noreferrer">
                IsoSignVid2Aud：无需文本中介的手语视频到音频转换
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            IsoSignVid2Aud: Sign Language Video to Audio Conversion without Text Intermediaries
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Harsh Kavediya, Vighnesh Nayak, Bheeshm Sharma, Balamurugan Palaniappan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于手语视频到音频的模态转换技术，属于计算机视觉和语音处理的交叉领域。虽然涉及多模态处理，但与推荐系统、搜索或广告的核心技术栈没有直接关联，也不涉及Transformer架构改进或LLM技术在推荐领域的应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 06:29:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07837v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07837v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.MM</span><span class="category-tag">cs.SD</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Sign language to spoken language audio translation is important to connect the hearing- and speech-challenged humans with others. We consider sign language videos with isolated sign sequences rather than continuous grammatical signing. Such videos are useful in educational applications and sign prompt interfaces. Towards this, we propose IsoSignVid2Aud, a novel end-to-end framework that translates sign language videos with a sequence of possibly non-grammatic continuous signs to speech without requiring intermediate text representation, providing immediate communication benefits while avoiding the latency and cascading errors inherent in multi-stage translation systems. Our approach combines an I3D-based feature extraction module with a specialized feature transformation network and an audio generation pipeline, utilizing a novel Non-Maximal Suppression (NMS) algorithm for the temporal detection of signs in non-grammatic continuous sequences. Experimental results demonstrate competitive performance on ASL-Citizen-1500 and WLASL-100 datasets with Top-1 accuracies of 72.01\% and 78.67\%, respectively, and audio quality metrics (PESQ: 2.67, STOI: 0.73) indicating intelligible speech output. Code is available at: https://github.com/BheeshmSharma/IsoSignVid2Aud_AIMLsystems-2025.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07830v1" target="_blank" rel="noopener noreferrer">
                PrismGS：面向高保真大规模3D高斯泼溅的物理基础抗锯齿技术
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            PrismGS: Physically-Grounded Anti-Aliasing for High-Fidelity Large-Scale 3D Gaussian Splatting
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Houqiang Zhong, Zhenglong Wu, Sihua Fu, Zihan Zheng, Xin Jin, Xiaoyun Zhang, Li ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D计算机图形学中的渲染技术（3D高斯泼溅和抗锯齿），属于纯粹的视觉/图形学领域。虽然标题提到'大规模'，但内容与推荐系统、搜索或广告的核心技术栈（排序、召回、用户建模等）无直接关联，且未提及任何潜在的跨模态应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 06:21:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07830v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07830v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    3D Gaussian Splatting (3DGS) has recently enabled real-time photorealistic rendering in compact scenes, but scaling to large urban environments introduces severe aliasing artifacts and optimization instability, especially under high-resolution (e.g., 4K) rendering. These artifacts, manifesting as flickering textures and jagged edges, arise from the mismatch between Gaussian primitives and the multi-scale nature of urban geometry. While existing ``divide-and-conquer'' pipelines address scalability, they fail to resolve this fidelity gap. In this paper, we propose PrismGS, a physically-grounded regularization framework that improves the intrinsic rendering behavior of 3D Gaussians. PrismGS integrates two synergistic regularizers. The first is pyramidal multi-scale supervision, which enforces consistency by supervising the rendering against a pre-filtered image pyramid. This compels the model to learn an inherently anti-aliased representation that remains coherent across different viewing scales, directly mitigating flickering textures. This is complemented by an explicit size regularization that imposes a physically-grounded lower bound on the dimensions of the 3D Gaussians. This prevents the formation of degenerate, view-dependent primitives, leading to more stable and plausible geometric surfaces and reducing jagged edges. Our method is plug-and-play and compatible with existing pipelines. Extensive experiments on MatrixCity, Mill-19, and UrbanScene3D demonstrate that PrismGS achieves state-of-the-art performance, yielding significant PSNR gains around 1.5 dB against CityGaussian, while maintaining its superior quality and robustness under demanding 4K rendering.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07823v1" target="_blank" rel="noopener noreferrer">
                通过扩展变换空间与缓解过拟合增强视觉提示
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Enhancing Visual Prompting through Expanded Transformation Space and Overfitting Mitigation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shohei Enomoto
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视觉提示技术，属于纯粹的计算机视觉领域，与推荐系统、搜索或广告的核心技术无关。虽然视觉语言模型被列为关注领域，但该论文仅涉及视觉提示的改进，并未涉及多模态建模或异构数据处理，因此对当前关注点没有实际应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 06:08:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07823v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07823v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Visual prompting (VP) has emerged as a promising parameter-efficient fine-tuning approach for adapting pre-trained vision models to downstream tasks without modifying model parameters. Despite offering advantages like negligible computational overhead and compatibility with black-box models, conventional VP methods typically achieve lower accuracy than other adaptation approaches. Our analysis reveals two critical limitations: the restricted expressivity of simple additive transformation and a tendency toward overfitting when the parameter count increases. To address these challenges, we propose ACAVP (Affine, Color, and Additive Visual Prompting), which enhances VP's expressive power by introducing complementary transformation operations: affine transformation for creating task-specific prompt regions while preserving original image information, and color transformation for emphasizing task-relevant visual features. Additionally, we identify that overfitting is a critical issue in VP training and introduce TrivialAugment as an effective data augmentation, which not only benefits our approach but also significantly improves existing VP methods, with performance gains of up to 12 percentage points on certain datasets. This demonstrates that appropriate data augmentation is universally beneficial for VP training. Extensive experiments across twelve diverse image classification datasets with two different model architectures demonstrate that ACAVP achieves state-of-the-art accuracy among VP methods, surpasses linear probing in average accuracy, and exhibits superior robustness to distribution shifts, all while maintaining minimal computational overhead during inference.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07810v1" target="_blank" rel="noopener noreferrer">
                FMANet：一种用于鲁棒微表情识别的融合运动注意力网络的新型双阶段光流方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FMANet: A Novel Dual-Phase Optical Flow Approach with Fusion Motion Attention Network for Robust Micro-expression Recognition
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Luu Tu Nguyen, Vu Tram Anh Khuong, Thi Bich Phuong Man, Thi Duyen Ngo, Thanh Ha ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于微表情识别这一计算机视觉特定领域，使用光流和注意力机制进行面部微表情分析。这与搜索、推荐或广告系统的核心需求没有直接关联，也不涉及LLM技术、Transformer架构进展或异构数据统一建模等关键焦点领域。微表情识别主要应用于心理学、人机交互等场景，与RecSys/Search/Ads的技术栈和应用场景相距甚远。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 05:36:40
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07810v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07810v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Facial micro-expressions, characterized by their subtle and brief nature, are valuable indicators of genuine emotions. Despite their significance in psychology, security, and behavioral analysis, micro-expression recognition remains challenging due to the difficulty of capturing subtle facial movements. Optical flow has been widely employed as an input modality for this task due to its effectiveness. However, most existing methods compute optical flow only between the onset and apex frames, thereby overlooking essential motion information in the apex-to-offset phase. To address this limitation, we first introduce a comprehensive motion representation, termed Magnitude-Modulated Combined Optical Flow (MM-COF), which integrates motion dynamics from both micro-expression phases into a unified descriptor suitable for direct use in recognition networks. Building upon this principle, we then propose FMANet, a novel end-to-end neural network architecture that internalizes the dual-phase analysis and magnitude modulation into learnable modules. This allows the network to adaptively fuse motion cues and focus on salient facial regions for classification. Experimental evaluations on the MMEW, SMIC, CASME-II, and SAMM datasets, widely recognized as standard benchmarks, demonstrate that our proposed MM-COF representation and FMANet outperforms existing methods, underscoring the potential of a learnable, dual-phase framework in advancing micro-expression recognition.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07785v1" target="_blank" rel="noopener noreferrer">
                基于3D UNet和可解释人工智能（XAI）的深度学习脑肿瘤分割技术解析：一项对比分析
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Demystifying Deep Learning-based Brain Tumor Segmentation with 3D UNets and Explainable AI (XAI): A Comparative Analysis
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ming Jie Ong, Sze Yinn Ung, Sim Kuan Goh, Jimmy Y. Zhong
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学影像领域的脑肿瘤分割，使用3D UNet和可解释AI技术，属于明确的医学/生物学应用范畴。根据用户要求，医学、生物学等特定领域应用属于不相关主题，且该技术没有明显的推荐系统、搜索或广告应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 05:03:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07785v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07785v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The current study investigated the use of Explainable Artificial Intelligence (XAI) to improve the accuracy of brain tumor segmentation in MRI images, with the goal of assisting physicians in clinical decision-making. The study focused on applying UNet models for brain tumor segmentation and using the XAI techniques of Gradient-weighted Class Activation Mapping (Grad-CAM) and attention-based visualization to enhance the understanding of these models. Three deep learning models - UNet, Residual UNet (ResUNet), and Attention UNet (AttUNet) - were evaluated to identify the best-performing model. XAI was employed with the aims of clarifying model decisions and increasing physicians' trust in these models. We compared the performance of two UNet variants (ResUNet and AttUNet) with the conventional UNet in segmenting brain tumors from the BraTS2020 public dataset and analyzed model predictions with Grad-CAM and attention-based visualization. Using the latest computer hardware, we trained and validated each model using the Adam optimizer and assessed their performance with respect to: (i) training, validation, and inference times, (ii) segmentation similarity coefficients and loss functions, and (iii) classification performance. Notably, during the final testing phase, ResUNet outperformed the other models with respect to Dice and Jaccard similarity scores, as well as accuracy, recall, and F1 scores. Grad-CAM provided visuospatial insights into the tumor subregions each UNet model focused on while attention-based visualization provided valuable insights into the working mechanisms of AttUNet's attention modules. These results demonstrated ResUNet as the best-performing model and we conclude by recommending its use for automated brain tumor segmentation in future clinical assessments. Our source code and checkpoint are available at https://github.com/ethanong98/MultiModel-XAI-Brats2020
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.07752v1" target="_blank" rel="noopener noreferrer">
                DEGS：基于可变形事件的三维高斯泼溅，从RGB和事件流中重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DEGS: Deformable Event-based 3D Gaussian Splatting from RGB and Event Stream
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Junhao He, Jiaxu Wang, Jia Li, Mingyuan Sun, Qiang Zhang, Jiahang Cao, Ziyi Zhan...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于事件相机和3D重建的计算机视觉技术，属于纯粹的视觉领域研究。标题中提到的3D高斯泼溅、事件流和RGB输入都是视觉感知和重建的特定技术，没有显示出与推荐系统、搜索或广告领域的任何潜在关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-09 03:43:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.07752v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.07752v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Reconstructing Dynamic 3D Gaussian Splatting (3DGS) from low-framerate RGB videos is challenging. This is because large inter-frame motions will increase the uncertainty of the solution space. For example, one pixel in the first frame might have more choices to reach the corresponding pixel in the second frame. Event cameras can asynchronously capture rapid visual changes and are robust to motion blur, but they do not provide color information. Intuitively, the event stream can provide deterministic constraints for the inter-frame large motion by the event trajectories. Hence, combining low-temporal-resolution images with high-framerate event streams can address this challenge. However, it is challenging to jointly optimize Dynamic 3DGS using both RGB and event modalities due to the significant discrepancy between these two data modalities. This paper introduces a novel framework that jointly optimizes dynamic 3DGS from the two modalities. The key idea is to adopt event motion priors to guide the optimization of the deformation fields. First, we extract the motion priors encoded in event streams by using the proposed LoCM unsupervised fine-tuning framework to adapt an event flow estimator to a certain unseen scene. Then, we present the geometry-aware data association method to build the event-Gaussian motion correspondence, which is the primary foundation of the pipeline, accompanied by two useful strategies, namely motion decomposition and inter-frame pseudo-label. Extensive experiments show that our method outperforms existing image and event-based approaches across synthetic and real scenes and prove that our method can effectively optimize dynamic 3DGS with the help of event data.
                </div>
            </details>
    </div>
</div>
        </div>
    </main>

    <!-- 加载论文数据和JavaScript逻辑 -->
    <script src="static/app.js"></script>

    <script>
        document.addEventListener('DOMContentLoaded', function() {
            // 在精选论文和普通论文之间添加展开/折叠按钮
            const papersContainer = document.querySelector('#papers-container');
            if (papersContainer) {
                // 添加展开/折叠全部按钮
                const expandAllButton = document.createElement('div');
                expandAllButton.className = 'expand-toggle';
                expandAllButton.textContent = '展开/折叠全部非精选论文';
                expandAllButton.addEventListener('click', function() {
                    papersContainer.classList.toggle('expanded-all');
                    this.textContent = papersContainer.classList.contains('expanded-all') ? 
                        '收起全部非精选论文' : '展开全部非精选论文';
                    
                    // 更新所有论文标题前的图标状态
                    const collapsedPapers = papersContainer.querySelectorAll('.collapsed-level-1');
                    collapsedPapers.forEach(paper => {
                        const iconElement = paper.querySelector('.expand-icon');
                        if (iconElement) {
                            iconElement.className = papersContainer.classList.contains('expanded-all') ? 
                                'expand-icon fa fa-eye' : 'expand-icon fa fa-eye-slash';
                        }
                    });
                });
                
                // 找到第一个非精选论文的位置
                const firstNormalPaper = papersContainer.querySelector('.simple-paper-card');
                if (firstNormalPaper) {
                    papersContainer.insertBefore(expandAllButton, firstNormalPaper);
                }
                
                // 添加分割线用于展开分数<=1的论文
                const divider = document.createElement('div');
                divider.className = 'papers-divider';
                
                const dividerLabel = document.createElement('div');
                dividerLabel.className = 'papers-divider-label';
                dividerLabel.textContent = '点击展开更多论文（评分较低）';
                dividerLabel.addEventListener('click', function() {
                    papersContainer.classList.toggle('expanded-level-2');
                    this.textContent = papersContainer.classList.contains('expanded-level-2') ? 
                        '点击收起低分论文' : '点击展开更多论文（评分较低）';
                });
                
                divider.appendChild(dividerLabel);
                
                // 在所有非精选论文的最后一个元素后面添加分割线
                const normalPapers = papersContainer.querySelectorAll('.simple-paper-card');
                if (normalPapers.length > 0) {
                    const lastNormalPaper = normalPapers[normalPapers.length - 1];
                    papersContainer.insertBefore(divider, lastNormalPaper.nextSibling);
                }
            }
            
            // 为每个非精选论文添加点击标题展开/折叠详情的功能
            const collapsedPapers = document.querySelectorAll('.collapsed-level-1');
            collapsedPapers.forEach(paper => {
                const titleElement = paper.querySelector('h3');
                if (titleElement) {
                    titleElement.style.cursor = 'pointer';
                    
                    // 创建展开/折叠图标元素并设置样式
                    const iconElement = document.createElement('i');
                    iconElement.className = 'expand-icon fa fa-eye-slash cursor-pointer';
                    iconElement.style.marginRight = '8px';
                    
                    // 将图标插入到标题链接之前，作为同级元素
                    const linkElement = titleElement.querySelector('a');
                    if (linkElement) {
                        // 将图标直接添加到标题元素中，位于链接之前
                        titleElement.insertBefore(iconElement, linkElement);
                        
                        // 为图标单独添加点击事件处理展开/折叠
                        iconElement.addEventListener('click', function(e) {
                            e.stopPropagation(); // 阻止事件冒泡到标题元素
                            const details = paper.querySelector('.paper-details');
                            if (details) {
                                const isExpanded = details.style.display === 'block';
                                details.style.display = isExpanded ? 'none' : 'block';
                                
                                // 更新图标状态
                                this.className = isExpanded ? 
                                    'expand-icon fa fa-eye-slash cursor-pointer' : 'expand-icon fa fa-eye cursor-pointer';
                                this.style.marginRight = '8px';
                            }
                        });
                    }
                    
                    // 为标题元素添加点击事件，也可以展开/折叠，但会检查点击目标
                    titleElement.addEventListener('click', function(e) {
                        // 仅当点击的是标题本身（非链接、非图标）时才展开/折叠
                        if (!e.target.closest('a') && !e.target.closest('.expand-icon')) {
                            const details = paper.querySelector('.paper-details');
                            if (details) {
                                const isExpanded = details.style.display === 'block';
                                details.style.display = isExpanded ? 'none' : 'block';
                                
                                // 更新图标状态
                                const iconElement = this.querySelector('.expand-icon');
                                if (iconElement) {
                                    iconElement.className = isExpanded ? 
                                        'expand-icon fa fa-eye-slash cursor-pointer' : 'expand-icon fa fa-eye cursor-pointer';
                                    iconElement.style.marginRight = '8px';
                                }
                            }
                        }
                    });
                }
            });
            
            // 实现"仅显示精选"按钮功能
            const showSelectedButton = document.getElementById('show-selected');
            if (showSelectedButton) {
                showSelectedButton.addEventListener('click', function() {
                    // 显示所有精选论文，隐藏所有普通论文
                    const selectedPapers = document.querySelectorAll('.paper-card');
                    const normalPapers = document.querySelectorAll('.simple-paper-card');
                    
                    selectedPapers.forEach(paper => {
                        paper.style.display = 'block';
                    });
                    
                    normalPapers.forEach(paper => {
                        paper.style.display = 'none';
                    });
                    
                    // 更新显示计数
                    const displayCountElement = document.getElementById('display-count');
                    if (displayCountElement) {
                        displayCountElement.textContent = `显示 ${selectedPapers.length} 篇论文 (共 ${selectedPapers.length + normalPapers.length} 篇)`;
                    }
                    
                    // 更新按钮样式
                    this.className = 'px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors';
                    document.getElementById('show-all').className = 'px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors';
                    
                    // 隐藏展开/折叠按钮和分割线
                    const expandToggle = document.querySelector('.expand-toggle');
                    if (expandToggle) expandToggle.style.display = 'none';
                    
                    const papersDivider = document.querySelector('.papers-divider');
                    if (papersDivider) papersDivider.style.display = 'none';
                });
            }
            
            // 实现"全部论文"按钮功能
            const showAllButton = document.getElementById('show-all');
            if (showAllButton) {
                showAllButton.addEventListener('click', function() {
                    // 显示所有论文
                    const allPapers = document.querySelectorAll('.paper-card, .simple-paper-card');
                    allPapers.forEach(paper => {
                        paper.style.display = 'block';
                    });
                    
                    // 重置折叠状态
                    papersContainer.classList.remove('expanded-all');
                    
                    // 更新显示计数
                    const displayCountElement = document.getElementById('display-count');
                    if (displayCountElement) {
                        displayCountElement.textContent = `显示 ${allPapers.length} 篇论文 (共 ${allPapers.length} 篇)`;
                    }
                    
                    // 更新按钮样式
                    this.className = 'px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors';
                    document.getElementById('show-selected').className = 'px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors';
                    
                    // 重新显示展开/折叠按钮和分割线
                    const expandToggle = document.querySelector('.expand-toggle');
                    if (expandToggle) {
                        expandToggle.style.display = 'block';
                        expandToggle.textContent = '展开全部非精选论文';
                    }
                    
                    const papersDivider = document.querySelector('.papers-divider');
                    if (papersDivider) papersDivider.style.display = 'block';
                });
            }
        });
    </script>
    <script>
    
    // 初始化日历
    document.addEventListener('DOMContentLoaded', () => {
        try {
            console.log('Attempting to initialize calendar...');
            initCalendar();
        } catch (error) {
            console.error('Error initializing calendar:', error);
        }
    });
    
    // 日历初始化函数
    function initCalendar() {
        const toggleBtn = document.getElementById('date-picker-toggle');
        const datePicker = document.getElementById('date-picker');
        const calendarGrid = document.getElementById('calendar-grid');
        const prevMonthBtn = document.getElementById('prev-month');
        const nextMonthBtn = document.getElementById('next-month');
        const currentMonthEl = document.getElementById('current-month');
        const selectedDateText = document.getElementById('selected-date-text');
        
        // 当前显示的日期（从页面获取）
        const currentDateStr = document.getElementById('current-date').textContent.trim().replace(/^\d+年|月|日/g, '');
        const currentDate = new Date(currentDateStr);
        let displayYear = currentDate.getFullYear();
        let displayMonth = currentDate.getMonth();
        
        // 有论文数据的日期列表
        const availableDates = ["20251014","20251022","20251023","20251015","20251024","20251009","20251010","20251016","20251021","20251017"];
        
        // 尝试从localStorage恢复选择状态
        const savedDate = localStorage.getItem('selectedDate');
        const savedYear = localStorage.getItem('selectedYear');
        const savedMonth = localStorage.getItem('selectedMonth');
        
        // 确保页面加载时显示当前选中的日期
        // 修复持久化问题：确保每次加载都能正确恢复选中状态
        if (savedDate) {
            selectedDateText.textContent = savedDate;
            if (savedYear) displayYear = parseInt(savedYear);
            if (savedMonth) displayMonth = parseInt(savedMonth);
        } else {
            // 首次加载时，将当前页面日期保存到localStorage
            const currentPageDate = currentDateStr.replace(/\//g, '-');
            selectedDateText.textContent = currentPageDate;
            localStorage.setItem('selectedDate', currentPageDate);
            localStorage.setItem('selectedYear', currentDate.getFullYear().toString());
            localStorage.setItem('selectedMonth', currentDate.getMonth().toString());
        }
    
        // 切换日历显示状态
        toggleBtn.addEventListener('click', (e) => {
            e.stopPropagation();
            
            // 显式控制hidden类的添加和移除
            if (datePicker.classList.contains('hidden')) {
                // 显示日历 - 确保移除hidden类
                datePicker.classList.remove('hidden');
                renderCalendar();
            } else {
                // 隐藏日历
                datePicker.classList.add('hidden');
            }
        });
        
        // 点击其他区域关闭日历
        document.addEventListener('click', () => {
            if (!datePicker.classList.contains('hidden')) {
                datePicker.classList.add('hidden');
            }
        });
        
        // 阻止日历内部点击事件冒泡
        datePicker.addEventListener('click', (e) => {
            e.stopPropagation();
        });
        
        // 上月和下月按钮
        prevMonthBtn.addEventListener('click', () => {
            displayMonth--;
            if (displayMonth < 0) {
                displayMonth = 11;
                displayYear--;
            }
            renderCalendar();
        });
        
        nextMonthBtn.addEventListener('click', () => {
            displayMonth++;
            if (displayMonth > 11) {
                displayMonth = 0;
                displayYear++;
            }
            renderCalendar();
        });
        
        /**
         * 渲染日历
         */
        function renderCalendar() {
            // 清空日历网格
            calendarGrid.innerHTML = '';
            
            // 更新当前月份显示
            const monthNames = ['1月', '2月', '3月', '4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月', '12月'];
            currentMonthEl.textContent = displayYear + '年' + monthNames[displayMonth];
            
            // 计算当前月份的第一天是星期几
            const firstDay = new Date(displayYear, displayMonth, 1);
            const firstDayOfWeek = firstDay.getDay();
            
            // 计算当前月份的天数
            const daysInMonth = new Date(displayYear, displayMonth + 1, 0).getDate();
            
            // 添加上月的占位天数
            for (let i = 0; i < firstDayOfWeek; i++) {
                const emptyDay = document.createElement('div');
                emptyDay.classList.add('py-1', 'text-gray-300');
                calendarGrid.appendChild(emptyDay);
            }
            
            // 获取当前日期（用于高亮显示）
            const today = new Date();
            today.setHours(0, 0, 0, 0);
            
            // 添加当前月份的天数
            for (let day = 1; day <= daysInMonth; day++) {
                const dayElement = document.createElement('div');
                const currentDateObj = new Date(displayYear, displayMonth, day);
                const dateStr = displayYear + String(displayMonth + 1).padStart(2, '0') + String(day).padStart(2, '0');
                const displayDateStr = displayYear + '-' + String(displayMonth + 1).padStart(2, '0') + '-' + String(day).padStart(2, '0');
                
                // 设置日期元素基本样式
                dayElement.textContent = day;
                
                // 检查该日期是否有论文数据
                const hasPapers = availableDates.includes(dateStr);
                
                if (hasPapers) {
                    // 有论文数据的日期样式
                    dayElement.classList.add('py-1', 'cursor-pointer', 'hover:bg-gray-100', 'rounded', 'bg-blue-50', 'font-medium');
                    
                    // 添加点击事件，跳转到对应日期的页面
                    dayElement.addEventListener('click', () => {
                        console.log('Date clicked:', displayDateStr);
                        selectedDateText.textContent = displayDateStr;
                        
                        // 保存选择状态到localStorage
                        localStorage.setItem('selectedDate', displayDateStr);
                        localStorage.setItem('selectedYear', displayYear.toString());
                        localStorage.setItem('selectedMonth', displayMonth.toString());
                        
                        datePicker.classList.add('hidden');
                        
                        // 构造目标URL并跳转
                        const targetUrl = 'arxiv_' + dateStr + '.html';
                        window.location.href = targetUrl;
                    });
                } else {
                    // 没有论文数据的日期样式（置灰不可点击）
                    dayElement.classList.add('py-1', 'text-gray-400', 'cursor-not-allowed');
                }
                
                // 高亮显示当天日期（覆盖之前的样式）
                if (currentDateObj.getTime() === today.getTime()) {
                    dayElement.classList.remove('bg-blue-50');
                    dayElement.classList.add('bg-primary', 'text-white', 'font-bold', 'shadow');
                    if (!hasPapers) {
                        // 当天没有论文时，仍然置灰但保持背景色
                        dayElement.classList.add('opacity-70');
                    }
                }
                
                // 高亮显示当前选中的日期
                if (displayDateStr === selectedDateText.textContent) {
                    dayElement.classList.add('font-bold', 'border-2', 'border-primary', 'rounded-lg', 'shadow-md');
                }
                
                // 增强有论文数据的日期样式，使其更明显
                if (hasPapers && currentDateObj.getTime() !== today.getTime()) {
                    dayElement.classList.add('bg-blue-100', 'hover:bg-blue-200', 'transition-colors', 'duration-200');
                }
                
                calendarGrid.appendChild(dayElement);
            }
        }
    }
    </script>
    </body>

</html>